IPv6 Classify Forwarding Graph errors
[vpp.git] / vnet / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
44 #include <vppinfra/cache.h>
45 #include <vnet/fib/fib_table.h>
46 #include <vnet/fib/ip6_fib.h>
47 #include <vnet/dpo/load_balance.h>
48 #include <vnet/dpo/classify_dpo.h>
49
50 #include <vppinfra/bihash_template.c>
51
52 /**
53  * @file
54  * @brief IPv6 Forwarding.
55  *
56  * This file contains the source code for IPv6 forwarding.
57  */
58
59 void
60 ip6_forward_next_trace (vlib_main_t * vm,
61                         vlib_node_runtime_t * node,
62                         vlib_frame_t * frame,
63                         vlib_rx_or_tx_t which_adj_index);
64
65 always_inline uword
66 ip6_lookup_inline (vlib_main_t * vm,
67                    vlib_node_runtime_t * node,
68                    vlib_frame_t * frame)
69 {
70   ip6_main_t * im = &ip6_main;
71   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
72   u32 n_left_from, n_left_to_next, * from, * to_next;
73   ip_lookup_next_t next;
74   u32 cpu_index = os_get_cpu_number();
75
76   from = vlib_frame_vector_args (frame);
77   n_left_from = frame->n_vectors;
78   next = node->cached_next_index;
79
80   while (n_left_from > 0)
81     {
82       vlib_get_next_frame (vm, node, next,
83                            to_next, n_left_to_next);
84
85       while (n_left_from >= 4 && n_left_to_next >= 2)
86         {
87           vlib_buffer_t * p0, * p1;
88           u32 pi0, pi1, lbi0, lbi1, wrong_next;
89           ip_lookup_next_t next0, next1;
90           ip6_header_t * ip0, * ip1;
91           ip6_address_t * dst_addr0, * dst_addr1;
92           u32 fib_index0, fib_index1;
93           u32 flow_hash_config0, flow_hash_config1;
94           const dpo_id_t *dpo0, *dpo1;
95           const load_balance_t *lb0, *lb1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
107             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
108           }
109
110           pi0 = to_next[0] = from[0];
111           pi1 = to_next[1] = from[1];
112
113           p0 = vlib_get_buffer (vm, pi0);
114           p1 = vlib_get_buffer (vm, pi1);
115
116           ip0 = vlib_buffer_get_current (p0);
117           ip1 = vlib_buffer_get_current (p1);
118
119           dst_addr0 = &ip0->dst_address;
120           dst_addr1 = &ip1->dst_address;
121
122           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
123           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
124
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
131           lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
132
133           lb0 = load_balance_get (lbi0);
134           lb1 = load_balance_get (lbi1);
135
136           vnet_buffer (p0)->ip.flow_hash =
137             vnet_buffer(p1)->ip.flow_hash = 0;
138
139           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
140             {
141               flow_hash_config0 = lb0->lb_hash_config;
142               vnet_buffer (p0)->ip.flow_hash =
143                 ip6_compute_flow_hash (ip0, flow_hash_config0);
144             }
145           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
146             {
147               flow_hash_config1 = lb1->lb_hash_config;
148               vnet_buffer (p1)->ip.flow_hash =
149                 ip6_compute_flow_hash (ip1, flow_hash_config1);
150             }
151
152           ASSERT (lb0->lb_n_buckets > 0);
153           ASSERT (lb1->lb_n_buckets > 0);
154           ASSERT (is_pow2 (lb0->lb_n_buckets));
155           ASSERT (is_pow2 (lb1->lb_n_buckets));
156           dpo0 = load_balance_get_bucket_i(lb0,
157                                            (vnet_buffer (p0)->ip.flow_hash &
158                                             lb0->lb_n_buckets_minus_1));
159           dpo1 = load_balance_get_bucket_i(lb1,
160                                            (vnet_buffer (p1)->ip.flow_hash &
161                                             lb1->lb_n_buckets_minus_1));
162
163           next0 = dpo0->dpoi_next_node;
164           next1 = dpo1->dpoi_next_node;
165
166           /* Only process the HBH Option Header if explicitly configured to do so */
167           next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
168                    im->hbh_enabled) ?
169             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
170             next0;
171           next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
172                    im->hbh_enabled) ?
173             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
174             next1;
175
176           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
177           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
178
179           vlib_increment_combined_counter
180               (cm, cpu_index, lbi0, 1,
181                vlib_buffer_length_in_chain (vm, p0));
182           vlib_increment_combined_counter
183               (cm, cpu_index, lbi1, 1,
184                vlib_buffer_length_in_chain (vm, p1));
185
186           from += 2;
187           to_next += 2;
188           n_left_to_next -= 2;
189           n_left_from -= 2;
190
191           wrong_next = (next0 != next) + 2*(next1 != next);
192           if (PREDICT_FALSE (wrong_next != 0))
193             {
194               switch (wrong_next)
195                 {
196                 case 1:
197                   /* A B A */
198                   to_next[-2] = pi1;
199                   to_next -= 1;
200                   n_left_to_next += 1;
201                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
202                   break;
203
204                 case 2:
205                   /* A A B */
206                   to_next -= 1;
207                   n_left_to_next += 1;
208                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
209                   break;
210
211                 case 3:
212                   /* A B C */
213                   to_next -= 2;
214                   n_left_to_next += 2;
215                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
216                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
217                   if (next0 == next1)
218                     {
219                       /* A B B */
220                       vlib_put_next_frame (vm, node, next, n_left_to_next);
221                       next = next1;
222                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
223                     }
224                 }
225             }
226         }
227
228       while (n_left_from > 0 && n_left_to_next > 0)
229         {
230           vlib_buffer_t * p0;
231           ip6_header_t * ip0;
232           u32 pi0, lbi0;
233           ip_lookup_next_t next0;
234           load_balance_t * lb0;
235           ip6_address_t * dst_addr0;
236           u32 fib_index0, flow_hash_config0;
237           const dpo_id_t *dpo0;
238
239           pi0 = from[0];
240           to_next[0] = pi0;
241
242           p0 = vlib_get_buffer (vm, pi0);
243
244           ip0 = vlib_buffer_get_current (p0);
245
246           dst_addr0 = &ip0->dst_address;
247
248           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
249           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
250             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
251
252           flow_hash_config0 =
253               ip6_fib_get (fib_index0)->flow_hash_config;
254
255           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
256
257           lb0 = load_balance_get (lbi0);
258
259           vnet_buffer (p0)->ip.flow_hash = 0;
260
261           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
262             {
263               flow_hash_config0 = lb0->lb_hash_config;
264               vnet_buffer (p0)->ip.flow_hash =
265                 ip6_compute_flow_hash (ip0, flow_hash_config0);
266             }
267
268           ASSERT (lb0->lb_n_buckets > 0);
269           ASSERT (is_pow2 (lb0->lb_n_buckets));
270           dpo0 = load_balance_get_bucket_i(lb0,
271                                            (vnet_buffer (p0)->ip.flow_hash &
272                                             lb0->lb_n_buckets_minus_1));
273           next0 = dpo0->dpoi_next_node;
274           /* Only process the HBH Option Header if explicitly configured to do so */
275           next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
276                    im->hbh_enabled) ?
277             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
278             next0;
279
280           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
281
282           vlib_increment_combined_counter
283               (cm, cpu_index, lbi0, 1,
284                vlib_buffer_length_in_chain (vm, p0));
285
286           from += 1;
287           to_next += 1;
288           n_left_to_next -= 1;
289           n_left_from -= 1;
290
291           if (PREDICT_FALSE (next0 != next))
292             {
293               n_left_to_next += 1;
294               vlib_put_next_frame (vm, node, next, n_left_to_next);
295               next = next0;
296               vlib_get_next_frame (vm, node, next,
297                                    to_next, n_left_to_next);
298               to_next[0] = pi0;
299               to_next += 1;
300               n_left_to_next -= 1;
301             }
302         }
303
304       vlib_put_next_frame (vm, node, next, n_left_to_next);
305     }
306
307   if (node->flags & VLIB_NODE_FLAG_TRACE)
308     ip6_forward_next_trace(vm, node, frame, VLIB_TX);
309
310   return frame->n_vectors;
311 }
312
313 static void
314 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
315                           ip6_main_t * im, u32 fib_index,
316                           ip_interface_address_t * a)
317 {
318   ip_lookup_main_t * lm = &im->lookup_main;
319   ip6_address_t * address = ip_interface_address_get_address (lm, a);
320   fib_prefix_t pfx = {
321       .fp_len = a->address_length,
322       .fp_proto = FIB_PROTOCOL_IP6,
323       .fp_addr.ip6 = *address,
324   };
325
326   a->neighbor_probe_adj_index = ~0;
327   if (a->address_length < 128)
328   {
329       fib_node_index_t fei;
330
331       fei = fib_table_entry_update_one_path(fib_index,
332                                             &pfx,
333                                             FIB_SOURCE_INTERFACE,
334                                             (FIB_ENTRY_FLAG_CONNECTED |
335                                              FIB_ENTRY_FLAG_ATTACHED),
336                                             FIB_PROTOCOL_IP6,
337                                             NULL, /* No next-hop address */
338                                             sw_if_index,
339                                             ~0, // invalid FIB index
340                                             1,
341                                             MPLS_LABEL_INVALID,
342                                             FIB_ROUTE_PATH_FLAG_NONE);
343       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
344   }
345
346   pfx.fp_len = 128;
347   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
348   {
349       u32 classify_table_index =
350           lm->classify_table_index_by_sw_if_index [sw_if_index];
351       if (classify_table_index != (u32) ~0)
352       {
353           dpo_id_t dpo = DPO_INVALID;
354
355           dpo_set(&dpo,
356                   DPO_CLASSIFY,
357                   DPO_PROTO_IP6,
358                   classify_dpo_create(DPO_PROTO_IP6, classify_table_index));
359
360           fib_table_entry_special_dpo_add(fib_index,
361                                           &pfx,
362                                           FIB_SOURCE_CLASSIFY,
363                                           FIB_ENTRY_FLAG_NONE,
364                                           &dpo);
365           dpo_reset(&dpo);
366       }
367   }
368
369   fib_table_entry_update_one_path(fib_index,
370                                   &pfx,
371                                   FIB_SOURCE_INTERFACE,
372                                   (FIB_ENTRY_FLAG_CONNECTED |
373                                    FIB_ENTRY_FLAG_LOCAL),
374                                   FIB_PROTOCOL_IP6,
375                                   &pfx.fp_addr,
376                                   sw_if_index,
377                                   ~0, // invalid FIB index
378                                   1,
379                                   MPLS_LABEL_INVALID,
380                                   FIB_ROUTE_PATH_FLAG_NONE);
381 }
382
383 static void
384 ip6_del_interface_routes (ip6_main_t * im,
385                           u32 fib_index,
386                           ip6_address_t * address,
387                           u32 address_length)
388 {
389     fib_prefix_t pfx = {
390         .fp_len = address_length,
391         .fp_proto = FIB_PROTOCOL_IP6,
392         .fp_addr.ip6 = *address,
393     };
394
395     if (pfx.fp_len < 128)
396     {
397         fib_table_entry_delete(fib_index,
398                                &pfx,
399                                FIB_SOURCE_INTERFACE);
400
401     }
402
403     pfx.fp_len = 128;
404     fib_table_entry_delete(fib_index,
405                            &pfx,
406                            FIB_SOURCE_INTERFACE);
407 }
408
409 void
410 ip6_sw_interface_enable_disable (u32 sw_if_index,
411                                  u32 is_enable)
412 {
413   vlib_main_t * vm = vlib_get_main();
414   ip6_main_t * im = &ip6_main;
415   ip_lookup_main_t * lm = &im->lookup_main;
416   u32 ci, cast;
417   u32 lookup_feature_index;
418
419   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
420
421   /*
422    * enable/disable only on the 1<->0 transition
423    */
424   if (is_enable)
425     {
426       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
427         return;
428     }
429   else
430     {
431       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
432       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
433         return;
434     }
435
436   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
437     {
438       ip_config_main_t * cm = &lm->feature_config_mains[cast];
439       vnet_config_main_t * vcm = &cm->config_main;
440
441       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
442       ci = cm->config_index_by_sw_if_index[sw_if_index];
443
444       if (cast == VNET_IP_RX_UNICAST_FEAT)
445         lookup_feature_index = im->ip6_unicast_rx_feature_lookup;
446       else
447         lookup_feature_index = im->ip6_multicast_rx_feature_lookup;
448
449       if (is_enable)
450         ci = vnet_config_add_feature (vm, vcm,
451                                       ci,
452                                       lookup_feature_index,
453                                       /* config data */ 0,
454                                       /* # bytes of config data */ 0);
455       else
456         ci = vnet_config_del_feature (vm, vcm,
457                                       ci,
458                                       lookup_feature_index,
459                                       /* config data */ 0,
460                                       /* # bytes of config data */ 0);
461
462       cm->config_index_by_sw_if_index[sw_if_index] = ci;
463     }
464 }
465
466 /* get first interface address */
467 ip6_address_t *
468 ip6_interface_first_address (ip6_main_t * im,
469                              u32 sw_if_index,
470                              ip_interface_address_t ** result_ia)
471 {
472   ip_lookup_main_t * lm = &im->lookup_main;
473   ip_interface_address_t * ia = 0;
474   ip6_address_t * result = 0;
475
476   foreach_ip_interface_address (lm, ia, sw_if_index,
477                                 1 /* honor unnumbered */,
478   ({
479     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
480     result = a;
481     break;
482   }));
483   if (result_ia)
484     *result_ia = result ? ia : 0;
485   return result;
486 }
487
488 clib_error_t *
489 ip6_add_del_interface_address (vlib_main_t * vm,
490                                u32 sw_if_index,
491                                ip6_address_t * address,
492                                u32 address_length,
493                                u32 is_del)
494 {
495   vnet_main_t * vnm = vnet_get_main();
496   ip6_main_t * im = &ip6_main;
497   ip_lookup_main_t * lm = &im->lookup_main;
498   clib_error_t * error;
499   u32 if_address_index;
500   ip6_address_fib_t ip6_af, * addr_fib = 0;
501
502   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
503   ip6_addr_fib_init (&ip6_af, address,
504                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
505   vec_add1 (addr_fib, ip6_af);
506
507   {
508     uword elts_before = pool_elts (lm->if_address_pool);
509
510     error = ip_interface_address_add_del
511       (lm,
512        sw_if_index,
513        addr_fib,
514        address_length,
515        is_del,
516        &if_address_index);
517     if (error)
518       goto done;
519
520     /* Pool did not grow: add duplicate address. */
521     if (elts_before == pool_elts (lm->if_address_pool))
522       goto done;
523   }
524
525   if (is_del)
526       ip6_del_interface_routes (im, ip6_af.fib_index, address,
527                                 address_length);
528   else
529       ip6_add_interface_routes (vnm, sw_if_index,
530                                 im, ip6_af.fib_index,
531                                 pool_elt_at_index (lm->if_address_pool, if_address_index));
532
533   {
534     ip6_add_del_interface_address_callback_t * cb;
535     vec_foreach (cb, im->add_del_interface_address_callbacks)
536       cb->function (im, cb->function_opaque, sw_if_index,
537                     address, address_length,
538                     if_address_index,
539                     is_del);
540   }
541
542  done:
543   vec_free (addr_fib);
544   return error;
545 }
546
547 clib_error_t *
548 ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
549                                 u32 sw_if_index,
550                                 u32 flags)
551 {
552   ip6_main_t * im = &ip6_main;
553   ip_interface_address_t * ia;
554   ip6_address_t * a;
555   u32 is_admin_up, fib_index;
556
557   /* Fill in lookup tables with default table (0). */
558   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
559
560   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
561
562   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
563
564   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
565
566   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
567                                 0 /* honor unnumbered */,
568   ({
569     a = ip_interface_address_get_address (&im->lookup_main, ia);
570     if (is_admin_up)
571       ip6_add_interface_routes (vnm, sw_if_index,
572                                 im, fib_index,
573                                 ia);
574     else
575       ip6_del_interface_routes (im, fib_index,
576                                 a, ia->address_length);
577   }));
578
579   return 0;
580 }
581
582 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
583
584 /* Built-in ip6 unicast rx feature path definition */
585 VNET_IP6_UNICAST_FEATURE_INIT (ip6_flow_classify, static) = {
586   .node_name = "ip6-flow-classify",
587   .runs_before = ORDER_CONSTRAINTS {"ip6-inacl", 0},
588   .feature_index = &ip6_main.ip6_unicast_rx_feature_flow_classify,
589 };
590
591 VNET_IP6_UNICAST_FEATURE_INIT (ip6_inacl, static) = {
592   .node_name = "ip6-inacl",
593   .runs_before = ORDER_CONSTRAINTS {"ip6-policer-classify", 0},
594   .feature_index = &ip6_main.ip6_unicast_rx_feature_check_access,
595 };
596
597 VNET_IP6_UNICAST_FEATURE_INIT (ip6_policer_classify, static) = {
598   .node_name = "ip6-policer-classify",
599   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip6", 0},
600   .feature_index = &ip6_main.ip6_unicast_rx_feature_policer_classify,
601 };
602
603 VNET_IP6_UNICAST_FEATURE_INIT (ip6_ipsec, static) = {
604   .node_name = "ipsec-input-ip6",
605   .runs_before = ORDER_CONSTRAINTS {"l2tp-decap", 0},
606   .feature_index = &ip6_main.ip6_unicast_rx_feature_ipsec,
607 };
608
609 VNET_IP6_UNICAST_FEATURE_INIT (ip6_l2tp, static) = {
610   .node_name = "l2tp-decap",
611   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip6", 0},
612   .feature_index = &ip6_main.ip6_unicast_rx_feature_l2tp_decap,
613 };
614
615 VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = {
616   .node_name = "vpath-input-ip6",
617   .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0},
618   .feature_index = &ip6_main.ip6_unicast_rx_feature_vpath,
619 };
620
621 VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = {
622   .node_name = "ip6-lookup",
623   .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
624   .feature_index = &ip6_main.ip6_unicast_rx_feature_lookup,
625 };
626
627 VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = {
628   .node_name = "ip6-drop",
629   .runs_before = 0,  /*last feature*/
630   .feature_index = &ip6_main.ip6_unicast_rx_feature_drop,
631 };
632
633 /* Built-in ip6 multicast rx feature path definition (none now) */
634 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = {
635   .node_name = "vpath-input-ip6",
636   .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0},
637   .feature_index = &ip6_main.ip6_multicast_rx_feature_vpath,
638 };
639
640 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = {
641   .node_name = "ip6-lookup",
642   .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
643   .feature_index = &ip6_main.ip6_multicast_rx_feature_lookup,
644 };
645
646 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = {
647   .node_name = "ip6-drop",
648   .runs_before = 0, /* last feature */
649   .feature_index = &ip6_main.ip6_multicast_rx_feature_drop,
650 };
651
652 static char * rx_feature_start_nodes[] =
653   {"ip6-input"};
654
655 static char * tx_feature_start_nodes[] =
656 {
657   "ip6-rewrite",
658   "ip6-midchain",
659 };
660
661 /* Built-in ip4 tx feature path definition */
662 VNET_IP6_TX_FEATURE_INIT (interface_output, static) = {
663   .node_name = "interface-output",
664   .runs_before = 0, /* not before any other features */
665   .feature_index = &ip6_main.ip6_tx_feature_interface_output,
666 };
667
668 static clib_error_t *
669 ip6_feature_init (vlib_main_t * vm, ip6_main_t * im)
670 {
671   ip_lookup_main_t * lm = &im->lookup_main;
672   clib_error_t * error;
673   vnet_cast_t cast;
674   ip_config_main_t * cm;
675   vnet_config_main_t * vcm;
676   char **feature_start_nodes;
677   int feature_start_len;
678
679   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
680     {
681       cm = &lm->feature_config_mains[cast];
682       vcm = &cm->config_main;
683
684       if (cast < VNET_IP_TX_FEAT)
685         {
686           feature_start_nodes = rx_feature_start_nodes;
687           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
688         }
689       else
690         {
691           feature_start_nodes = tx_feature_start_nodes;
692           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
693         }
694
695       if ((error = vnet_feature_arc_init (vm, vcm,
696                                          feature_start_nodes,
697                                          feature_start_len,
698                                          im->next_feature[cast],
699                                          &im->feature_nodes[cast])))
700         return error;
701     }
702   return 0;
703 }
704
705 clib_error_t *
706 ip6_sw_interface_add_del (vnet_main_t * vnm,
707                           u32 sw_if_index,
708                           u32 is_add)
709 {
710   vlib_main_t * vm = vnm->vlib_main;
711   ip6_main_t * im = &ip6_main;
712   ip_lookup_main_t * lm = &im->lookup_main;
713   u32 ci, cast;
714   u32 feature_index;
715
716   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
717     {
718       ip_config_main_t * cm = &lm->feature_config_mains[cast];
719       vnet_config_main_t * vcm = &cm->config_main;
720
721       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
722       ci = cm->config_index_by_sw_if_index[sw_if_index];
723
724       if (cast == VNET_IP_RX_UNICAST_FEAT)
725         feature_index = im->ip6_unicast_rx_feature_drop;
726       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
727         feature_index = im->ip6_multicast_rx_feature_drop;
728       else
729         feature_index = im->ip6_tx_feature_interface_output;
730
731       if (is_add)
732         ci = vnet_config_add_feature (vm, vcm,
733                                       ci,
734                                       feature_index,
735                                       /* config data */ 0,
736                                       /* # bytes of config data */ 0);
737       else
738         {
739           ci = vnet_config_del_feature (vm, vcm, ci,
740                                         feature_index,
741                                         /* config data */ 0,
742                                         /* # bytes of config data */ 0);
743           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
744               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
745         }
746       cm->config_index_by_sw_if_index[sw_if_index] = ci;
747       /*
748        * note: do not update the tx feature count here.
749        */
750     }
751   return /* no error */ 0;
752 }
753
754 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
755
756 static uword
757 ip6_lookup (vlib_main_t * vm,
758             vlib_node_runtime_t * node,
759             vlib_frame_t * frame)
760 {
761   return ip6_lookup_inline (vm, node, frame);
762 }
763
764 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args);
765
766 VLIB_REGISTER_NODE (ip6_lookup_node) = {
767   .function = ip6_lookup,
768   .name = "ip6-lookup",
769   .vector_size = sizeof (u32),
770
771   .format_trace = format_ip6_lookup_trace,
772
773   .n_next_nodes = IP6_LOOKUP_N_NEXT,
774   .next_nodes = IP6_LOOKUP_NEXT_NODES,
775 };
776
777 VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup)
778
779 always_inline uword
780 ip6_load_balance (vlib_main_t * vm,
781                   vlib_node_runtime_t * node,
782                   vlib_frame_t * frame)
783 {
784   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
785   u32 n_left_from, n_left_to_next, * from, * to_next;
786   ip_lookup_next_t next;
787   u32 cpu_index = os_get_cpu_number();
788
789   from = vlib_frame_vector_args (frame);
790   n_left_from = frame->n_vectors;
791   next = node->cached_next_index;
792
793   if (node->flags & VLIB_NODE_FLAG_TRACE)
794       ip6_forward_next_trace(vm, node, frame, VLIB_TX);
795
796   while (n_left_from > 0)
797     {
798       vlib_get_next_frame (vm, node, next,
799                            to_next, n_left_to_next);
800
801
802       while (n_left_from > 0 && n_left_to_next > 0)
803         {
804           ip_lookup_next_t next0;
805           const load_balance_t *lb0;
806           vlib_buffer_t * p0;
807           u32 pi0, lbi0, hc0;
808           const ip6_header_t *ip0;
809           const dpo_id_t *dpo0;
810
811           pi0 = from[0];
812           to_next[0] = pi0;
813
814           p0 = vlib_get_buffer (vm, pi0);
815
816           ip0 = vlib_buffer_get_current (p0);
817           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
818
819           lb0 = load_balance_get(lbi0);
820           hc0 = lb0->lb_hash_config;
821           vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0);
822
823           dpo0 = load_balance_get_bucket_i(lb0,
824                                            vnet_buffer(p0)->ip.flow_hash &
825                                            (lb0->lb_n_buckets - 1));
826
827           next0 = dpo0->dpoi_next_node;
828           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
829
830           vlib_increment_combined_counter
831               (cm, cpu_index, lbi0, 1,
832                vlib_buffer_length_in_chain (vm, p0));
833
834           from += 1;
835           to_next += 1;
836           n_left_to_next -= 1;
837           n_left_from -= 1;
838
839           if (PREDICT_FALSE (next0 != next))
840             {
841               n_left_to_next += 1;
842               vlib_put_next_frame (vm, node, next, n_left_to_next);
843               next = next0;
844               vlib_get_next_frame (vm, node, next,
845                                    to_next, n_left_to_next);
846               to_next[0] = pi0;
847               to_next += 1;
848               n_left_to_next -= 1;
849             }
850         }
851
852       vlib_put_next_frame (vm, node, next, n_left_to_next);
853     }
854
855   return frame->n_vectors;
856 }
857
858 VLIB_REGISTER_NODE (ip6_load_balance_node) = {
859   .function = ip6_load_balance,
860   .name = "ip6-load-balance",
861   .vector_size = sizeof (u32),
862   .sibling_of = "ip6-lookup",
863   .format_trace = format_ip6_lookup_trace,
864   .n_next_nodes = 0,
865 };
866
867 VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance)
868
869 typedef struct {
870   /* Adjacency taken. */
871   u32 adj_index;
872   u32 flow_hash;
873   u32 fib_index;
874
875   /* Packet data, possibly *after* rewrite. */
876   u8 packet_data[128 - 1*sizeof(u32)];
877 } ip6_forward_next_trace_t;
878
879 static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
880 {
881   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
882   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
883   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
884   uword indent = format_get_indent (s);
885
886   s = format(s, "%U%U",
887              format_white_space, indent,
888              format_ip6_header, t->packet_data, sizeof (t->packet_data));
889   return s;
890 }
891
892 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args)
893 {
894   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
895   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
896   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
897   uword indent = format_get_indent (s);
898
899   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
900               t->fib_index, t->adj_index, t->flow_hash);
901   s = format(s, "\n%U%U",
902              format_white_space, indent,
903              format_ip6_header, t->packet_data, sizeof (t->packet_data));
904   return s;
905 }
906
907
908 static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
909 {
910   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
911   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
912   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
913   vnet_main_t * vnm = vnet_get_main();
914   uword indent = format_get_indent (s);
915
916   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
917               t->fib_index, t->adj_index, format_ip_adjacency,
918               t->adj_index, FORMAT_IP_ADJACENCY_NONE,
919               t->flow_hash);
920   s = format (s, "\n%U%U",
921               format_white_space, indent,
922               format_ip_adjacency_packet_data,
923               vnm, t->adj_index,
924               t->packet_data, sizeof (t->packet_data));
925   return s;
926 }
927
928 /* Common trace function for all ip6-forward next nodes. */
929 void
930 ip6_forward_next_trace (vlib_main_t * vm,
931                         vlib_node_runtime_t * node,
932                         vlib_frame_t * frame,
933                         vlib_rx_or_tx_t which_adj_index)
934 {
935   u32 * from, n_left;
936   ip6_main_t * im = &ip6_main;
937
938   n_left = frame->n_vectors;
939   from = vlib_frame_vector_args (frame);
940
941   while (n_left >= 4)
942     {
943       u32 bi0, bi1;
944       vlib_buffer_t * b0, * b1;
945       ip6_forward_next_trace_t * t0, * t1;
946
947       /* Prefetch next iteration. */
948       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
949       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
950
951       bi0 = from[0];
952       bi1 = from[1];
953
954       b0 = vlib_get_buffer (vm, bi0);
955       b1 = vlib_get_buffer (vm, bi1);
956
957       if (b0->flags & VLIB_BUFFER_IS_TRACED)
958         {
959           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
960           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
961           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
962           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
963               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
964               vec_elt (im->fib_index_by_sw_if_index,
965                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
966
967           clib_memcpy (t0->packet_data,
968                   vlib_buffer_get_current (b0),
969                   sizeof (t0->packet_data));
970         }
971       if (b1->flags & VLIB_BUFFER_IS_TRACED)
972         {
973           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
974           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
975           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
976           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
977               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
978               vec_elt (im->fib_index_by_sw_if_index,
979                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
980
981           clib_memcpy (t1->packet_data,
982                   vlib_buffer_get_current (b1),
983                   sizeof (t1->packet_data));
984         }
985       from += 2;
986       n_left -= 2;
987     }
988
989   while (n_left >= 1)
990     {
991       u32 bi0;
992       vlib_buffer_t * b0;
993       ip6_forward_next_trace_t * t0;
994
995       bi0 = from[0];
996
997       b0 = vlib_get_buffer (vm, bi0);
998
999       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1000         {
1001           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1002           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1003           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1004           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1005               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1006               vec_elt (im->fib_index_by_sw_if_index,
1007                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1008
1009           clib_memcpy (t0->packet_data,
1010                   vlib_buffer_get_current (b0),
1011                   sizeof (t0->packet_data));
1012         }
1013       from += 1;
1014       n_left -= 1;
1015     }
1016 }
1017
1018 static uword
1019 ip6_drop_or_punt (vlib_main_t * vm,
1020                   vlib_node_runtime_t * node,
1021                   vlib_frame_t * frame,
1022                   ip6_error_t error_code)
1023 {
1024   u32 * buffers = vlib_frame_vector_args (frame);
1025   uword n_packets = frame->n_vectors;
1026
1027   vlib_error_drop_buffers (vm, node,
1028                            buffers,
1029                            /* stride */ 1,
1030                            n_packets,
1031                            /* next */ 0,
1032                            ip6_input_node.index,
1033                            error_code);
1034
1035   if (node->flags & VLIB_NODE_FLAG_TRACE)
1036     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1037
1038   return n_packets;
1039 }
1040
1041 static uword
1042 ip6_drop (vlib_main_t * vm,
1043           vlib_node_runtime_t * node,
1044           vlib_frame_t * frame)
1045 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
1046
1047 static uword
1048 ip6_punt (vlib_main_t * vm,
1049           vlib_node_runtime_t * node,
1050           vlib_frame_t * frame)
1051 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
1052
1053 VLIB_REGISTER_NODE (ip6_drop_node,static) = {
1054   .function = ip6_drop,
1055   .name = "ip6-drop",
1056   .vector_size = sizeof (u32),
1057
1058   .format_trace = format_ip6_forward_next_trace,
1059
1060   .n_next_nodes = 1,
1061   .next_nodes = {
1062     [0] = "error-drop",
1063   },
1064 };
1065
1066 VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop)
1067
1068 VLIB_REGISTER_NODE (ip6_punt_node,static) = {
1069   .function = ip6_punt,
1070   .name = "ip6-punt",
1071   .vector_size = sizeof (u32),
1072
1073   .format_trace = format_ip6_forward_next_trace,
1074
1075   .n_next_nodes = 1,
1076   .next_nodes = {
1077     [0] = "error-punt",
1078   },
1079 };
1080
1081 VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt)
1082
1083 VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
1084   .function = ip6_drop,
1085   .name = "ip6-multicast",
1086   .vector_size = sizeof (u32),
1087
1088   .format_trace = format_ip6_forward_next_trace,
1089
1090   .n_next_nodes = 1,
1091   .next_nodes = {
1092     [0] = "error-drop",
1093   },
1094 };
1095
1096 /* Compute TCP/UDP/ICMP6 checksum in software. */
1097 u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
1098 {
1099   ip_csum_t sum0;
1100   u16 sum16, payload_length_host_byte_order;
1101   u32 i, n_this_buffer, n_bytes_left;
1102   u32 headers_size = sizeof(ip0[0]);
1103   void * data_this_buffer;
1104
1105   ASSERT(bogus_lengthp);
1106   *bogus_lengthp = 0;
1107
1108   /* Initialize checksum with ip header. */
1109   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1110   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1111   data_this_buffer = (void *) (ip0 + 1);
1112
1113   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1114     {
1115       sum0 = ip_csum_with_carry (sum0,
1116                                  clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1117       sum0 = ip_csum_with_carry (sum0,
1118                                  clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1119     }
1120
1121   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1122   if (PREDICT_FALSE (ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1123     {
1124       u32  skip_bytes;
1125       ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)data_this_buffer;
1126
1127       /* validate really icmp6 next */
1128       ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
1129
1130       skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
1131       data_this_buffer  = (void *)((u8 *)data_this_buffer + skip_bytes);
1132
1133       payload_length_host_byte_order  -= skip_bytes;
1134       headers_size += skip_bytes;
1135    }
1136
1137   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1138 #if DPDK > 0
1139   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1140   {
1141     struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer(p0);
1142     u8 nb_segs = mb->nb_segs;
1143
1144     n_this_buffer = (p0->current_length > headers_size ?
1145                      p0->current_length - headers_size : 0);
1146     while (n_bytes_left)
1147       {
1148         sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1149         n_bytes_left -= n_this_buffer;
1150
1151         mb = mb->next;
1152         nb_segs--;
1153         if ((nb_segs == 0) || (mb == 0))
1154           break;
1155
1156         data_this_buffer = rte_ctrlmbuf_data(mb);
1157         n_this_buffer = mb->data_len;
1158       }
1159     if (n_bytes_left || nb_segs)
1160       {
1161         *bogus_lengthp = 1;
1162         return 0xfefe;
1163       }
1164   }
1165   else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1166 #else
1167   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1168     n_this_buffer = p0->current_length > headers_size  ? p0->current_length - headers_size  : 0;
1169   while (1)
1170     {
1171       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1172       n_bytes_left -= n_this_buffer;
1173       if (n_bytes_left == 0)
1174         break;
1175
1176       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1177         {
1178           *bogus_lengthp = 1;
1179           return 0xfefe;
1180         }
1181       p0 = vlib_get_buffer (vm, p0->next_buffer);
1182       data_this_buffer = vlib_buffer_get_current (p0);
1183       n_this_buffer = p0->current_length;
1184     }
1185 #endif /* DPDK */
1186
1187   sum16 = ~ ip_csum_fold (sum0);
1188
1189   return sum16;
1190 }
1191
1192 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1193 {
1194   ip6_header_t * ip0 = vlib_buffer_get_current (p0);
1195   udp_header_t * udp0;
1196   u16 sum16;
1197   int bogus_length;
1198
1199   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1200   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1201           || ip0->protocol == IP_PROTOCOL_ICMP6
1202           || ip0->protocol == IP_PROTOCOL_UDP
1203           || ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1204
1205   udp0 = (void *) (ip0 + 1);
1206   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1207     {
1208       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1209                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1210       return p0->flags;
1211     }
1212
1213   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1214
1215   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1216                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1217
1218   return p0->flags;
1219 }
1220
1221 static uword
1222 ip6_local (vlib_main_t * vm,
1223            vlib_node_runtime_t * node,
1224            vlib_frame_t * frame)
1225 {
1226   ip6_main_t * im = &ip6_main;
1227   ip_lookup_main_t * lm = &im->lookup_main;
1228   ip_local_next_t next_index;
1229   u32 * from, * to_next, n_left_from, n_left_to_next;
1230   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1231
1232   from = vlib_frame_vector_args (frame);
1233   n_left_from = frame->n_vectors;
1234   next_index = node->cached_next_index;
1235
1236   if (node->flags & VLIB_NODE_FLAG_TRACE)
1237     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1238
1239   while (n_left_from > 0)
1240     {
1241       vlib_get_next_frame (vm, node, next_index,
1242                            to_next, n_left_to_next);
1243
1244       while (n_left_from >= 4 && n_left_to_next >= 2)
1245         {
1246           vlib_buffer_t * p0, * p1;
1247           ip6_header_t * ip0, * ip1;
1248           udp_header_t * udp0, * udp1;
1249           u32 pi0, ip_len0, udp_len0, flags0, next0;
1250           u32 pi1, ip_len1, udp_len1, flags1, next1;
1251           i32 len_diff0, len_diff1;
1252           u8 error0, type0, good_l4_checksum0;
1253           u8 error1, type1, good_l4_checksum1;
1254
1255           pi0 = to_next[0] = from[0];
1256           pi1 = to_next[1] = from[1];
1257           from += 2;
1258           n_left_from -= 2;
1259           to_next += 2;
1260           n_left_to_next -= 2;
1261
1262           p0 = vlib_get_buffer (vm, pi0);
1263           p1 = vlib_get_buffer (vm, pi1);
1264
1265           ip0 = vlib_buffer_get_current (p0);
1266           ip1 = vlib_buffer_get_current (p1);
1267
1268           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1269           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1270
1271           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1272           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1273
1274           flags0 = p0->flags;
1275           flags1 = p1->flags;
1276
1277           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1278           good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1279
1280           udp0 = ip6_next_header (ip0);
1281           udp1 = ip6_next_header (ip1);
1282
1283           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1284           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1285           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
1286
1287           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1288           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1289
1290           /* Verify UDP length. */
1291           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1292           ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1293           udp_len0 = clib_net_to_host_u16 (udp0->length);
1294           udp_len1 = clib_net_to_host_u16 (udp1->length);
1295
1296           len_diff0 = ip_len0 - udp_len0;
1297           len_diff1 = ip_len1 - udp_len1;
1298
1299           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1300           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1301
1302           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1303                              && ! good_l4_checksum0
1304                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1305             {
1306               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1307               good_l4_checksum0 =
1308                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1309             }
1310           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1311                              && ! good_l4_checksum1
1312                              && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1313             {
1314               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1315               good_l4_checksum1 =
1316                 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1317             }
1318
1319           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1320
1321           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1322           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1323
1324           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1325           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1326           error0 = (! good_l4_checksum0
1327                     ? IP6_ERROR_UDP_CHECKSUM + type0
1328                     : error0);
1329           error1 = (! good_l4_checksum1
1330                     ? IP6_ERROR_UDP_CHECKSUM + type1
1331                     : error1);
1332
1333           /* Drop packets from unroutable hosts. */
1334           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1335           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1336               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1337               !ip6_address_is_link_local_unicast(&ip0->src_address))
1338             {
1339               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1340               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1341                         ? IP6_ERROR_SRC_LOOKUP_MISS
1342                         : error0);
1343             }
1344           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1345               type1 != IP_BUILTIN_PROTOCOL_ICMP &&
1346               !ip6_address_is_link_local_unicast(&ip1->src_address))
1347             {
1348               u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
1349               error1 = (ADJ_INDEX_INVALID == src_adj_index1
1350                         ? IP6_ERROR_SRC_LOOKUP_MISS
1351                         : error1);
1352             }
1353
1354           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1355           next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1356
1357           p0->error = error_node->errors[error0];
1358           p1->error = error_node->errors[error1];
1359
1360           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1361                                            to_next, n_left_to_next,
1362                                            pi0, pi1, next0, next1);
1363         }
1364
1365       while (n_left_from > 0 && n_left_to_next > 0)
1366         {
1367           vlib_buffer_t * p0;
1368           ip6_header_t * ip0;
1369           udp_header_t * udp0;
1370           u32 pi0, ip_len0, udp_len0, flags0, next0;
1371           i32 len_diff0;
1372           u8 error0, type0, good_l4_checksum0;
1373
1374           pi0 = to_next[0] = from[0];
1375           from += 1;
1376           n_left_from -= 1;
1377           to_next += 1;
1378           n_left_to_next -= 1;
1379
1380           p0 = vlib_get_buffer (vm, pi0);
1381
1382           ip0 = vlib_buffer_get_current (p0);
1383
1384           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1385           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1386
1387           flags0 = p0->flags;
1388
1389           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1390
1391           udp0 = ip6_next_header (ip0);
1392
1393           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1394           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1395
1396           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1397
1398           /* Verify UDP length. */
1399           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1400           udp_len0 = clib_net_to_host_u16 (udp0->length);
1401
1402           len_diff0 = ip_len0 - udp_len0;
1403
1404           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1405
1406           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1407                              && ! good_l4_checksum0
1408                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1409             {
1410               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1411               good_l4_checksum0 =
1412                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1413             }
1414
1415           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1416
1417           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1418
1419           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1420           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1421           error0 = (! good_l4_checksum0
1422                     ? IP6_ERROR_UDP_CHECKSUM + type0
1423                     : error0);
1424
1425           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1426           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1427               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1428               !ip6_address_is_link_local_unicast(&ip0->src_address))
1429             {
1430               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1431               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1432                         ? IP6_ERROR_SRC_LOOKUP_MISS
1433                         : error0);
1434             }
1435
1436           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1437
1438           p0->error = error_node->errors[error0];
1439
1440           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1441                                            to_next, n_left_to_next,
1442                                            pi0, next0);
1443         }
1444
1445       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1446     }
1447
1448   return frame->n_vectors;
1449 }
1450
1451 VLIB_REGISTER_NODE (ip6_local_node,static) = {
1452   .function = ip6_local,
1453   .name = "ip6-local",
1454   .vector_size = sizeof (u32),
1455
1456   .format_trace = format_ip6_forward_next_trace,
1457
1458   .n_next_nodes = IP_LOCAL_N_NEXT,
1459   .next_nodes = {
1460     [IP_LOCAL_NEXT_DROP] = "error-drop",
1461     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1462     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1463     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1464   },
1465 };
1466
1467 VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local)
1468
1469 void ip6_register_protocol (u32 protocol, u32 node_index)
1470 {
1471   vlib_main_t * vm = vlib_get_main();
1472   ip6_main_t * im = &ip6_main;
1473   ip_lookup_main_t * lm = &im->lookup_main;
1474
1475   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1476   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
1477 }
1478
1479 typedef enum {
1480   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1481   IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
1482   IP6_DISCOVER_NEIGHBOR_N_NEXT,
1483 } ip6_discover_neighbor_next_t;
1484
1485 typedef enum {
1486   IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
1487   IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
1488   IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
1489 } ip6_discover_neighbor_error_t;
1490
1491 static uword
1492 ip6_discover_neighbor_inline (vlib_main_t * vm,
1493                               vlib_node_runtime_t * node,
1494                               vlib_frame_t * frame,
1495                               int is_glean)
1496 {
1497   vnet_main_t * vnm = vnet_get_main();
1498   ip6_main_t * im = &ip6_main;
1499   ip_lookup_main_t * lm = &im->lookup_main;
1500   u32 * from, * to_next_drop;
1501   uword n_left_from, n_left_to_next_drop;
1502   static f64 time_last_seed_change = -1e100;
1503   static u32 hash_seeds[3];
1504   static uword hash_bitmap[256 / BITS (uword)];
1505   f64 time_now;
1506   int bogus_length;
1507
1508   if (node->flags & VLIB_NODE_FLAG_TRACE)
1509     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1510
1511   time_now = vlib_time_now (vm);
1512   if (time_now - time_last_seed_change > 1e-3)
1513     {
1514       uword i;
1515       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1516                                              sizeof (hash_seeds));
1517       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1518         hash_seeds[i] = r[i];
1519
1520       /* Mark all hash keys as been not-seen before. */
1521       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1522         hash_bitmap[i] = 0;
1523
1524       time_last_seed_change = time_now;
1525     }
1526
1527   from = vlib_frame_vector_args (frame);
1528   n_left_from = frame->n_vectors;
1529
1530   while (n_left_from > 0)
1531     {
1532       vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1533                            to_next_drop, n_left_to_next_drop);
1534
1535       while (n_left_from > 0 && n_left_to_next_drop > 0)
1536         {
1537           vlib_buffer_t * p0;
1538           ip6_header_t * ip0;
1539           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1540           uword bm0;
1541           ip_adjacency_t * adj0;
1542           vnet_hw_interface_t * hw_if0;
1543           u32 next0;
1544
1545           pi0 = from[0];
1546
1547           p0 = vlib_get_buffer (vm, pi0);
1548
1549           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1550
1551           ip0 = vlib_buffer_get_current (p0);
1552
1553           adj0 = ip_get_adjacency (lm, adj_index0);
1554
1555           if (!is_glean)
1556             {
1557               ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
1558               ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
1559             }
1560
1561           a0 = hash_seeds[0];
1562           b0 = hash_seeds[1];
1563           c0 = hash_seeds[2];
1564
1565           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1566           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1567
1568           a0 ^= sw_if_index0;
1569           b0 ^= ip0->dst_address.as_u32[0];
1570           c0 ^= ip0->dst_address.as_u32[1];
1571
1572           hash_v3_mix32 (a0, b0, c0);
1573
1574           b0 ^= ip0->dst_address.as_u32[2];
1575           c0 ^= ip0->dst_address.as_u32[3];
1576
1577           hash_v3_finalize32 (a0, b0, c0);
1578
1579           c0 &= BITS (hash_bitmap) - 1;
1580           c0 = c0 / BITS (uword);
1581           m0 = (uword) 1 << (c0 % BITS (uword));
1582
1583           bm0 = hash_bitmap[c0];
1584           drop0 = (bm0 & m0) != 0;
1585
1586           /* Mark it as seen. */
1587           hash_bitmap[c0] = bm0 | m0;
1588
1589           from += 1;
1590           n_left_from -= 1;
1591           to_next_drop[0] = pi0;
1592           to_next_drop += 1;
1593           n_left_to_next_drop -= 1;
1594
1595           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1596
1597           /* If the interface is link-down, drop the pkt */
1598           if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
1599             drop0 = 1;
1600
1601           p0->error =
1602             node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
1603                          : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
1604           if (drop0)
1605             continue;
1606
1607           /*
1608            * the adj has been updated to a rewrite but the node the DPO that got
1609            * us here hasn't - yet. no big deal. we'll drop while we wait.
1610            */
1611           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1612             continue;
1613
1614           {
1615             u32 bi0 = 0;
1616             icmp6_neighbor_solicitation_header_t * h0;
1617             vlib_buffer_t * b0;
1618
1619             h0 = vlib_packet_template_get_packet
1620               (vm, &im->discover_neighbor_packet_template, &bi0);
1621
1622             /*
1623              * Build ethernet header.
1624              * Choose source address based on destination lookup
1625              * adjacency.
1626              */
1627             if (ip6_src_address_for_packet (lm,
1628                                             sw_if_index0,
1629                                             &h0->ip.src_address))
1630               {
1631                 /* There is no address on the interface */
1632                 p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
1633                 vlib_buffer_free(vm, &bi0, 1);
1634                 continue;
1635               }
1636
1637             /*
1638              * Destination address is a solicited node multicast address.
1639              * We need to fill in
1640              * the low 24 bits with low 24 bits of target's address.
1641              */
1642             h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
1643             h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
1644             h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
1645
1646             h0->neighbor.target_address = ip0->dst_address;
1647
1648             clib_memcpy (h0->link_layer_option.ethernet_address,
1649                     hw_if0->hw_address, vec_len (hw_if0->hw_address));
1650
1651             /* $$$$ appears we need this; why is the checksum non-zero? */
1652             h0->neighbor.icmp.checksum = 0;
1653             h0->neighbor.icmp.checksum =
1654               ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
1655                                                  &bogus_length);
1656
1657             ASSERT (bogus_length == 0);
1658
1659             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1660             b0 = vlib_get_buffer (vm, bi0);
1661             vnet_buffer (b0)->sw_if_index[VLIB_TX]
1662               = vnet_buffer (p0)->sw_if_index[VLIB_TX];
1663
1664             /* Add rewrite/encap string. */
1665             vnet_rewrite_one_header (adj0[0], h0,
1666                                      sizeof (ethernet_header_t));
1667             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1668
1669             next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
1670
1671             vlib_set_next_frame_buffer (vm, node, next0, bi0);
1672           }
1673         }
1674
1675       vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1676                            n_left_to_next_drop);
1677     }
1678
1679   return frame->n_vectors;
1680 }
1681
1682 static uword
1683 ip6_discover_neighbor (vlib_main_t * vm,
1684                        vlib_node_runtime_t * node,
1685                        vlib_frame_t * frame)
1686 {
1687     return (ip6_discover_neighbor_inline(vm, node, frame, 0));
1688 }
1689
1690 static uword
1691 ip6_glean (vlib_main_t * vm,
1692            vlib_node_runtime_t * node,
1693            vlib_frame_t * frame)
1694 {
1695     return (ip6_discover_neighbor_inline(vm, node, frame, 1));
1696 }
1697
1698 static char * ip6_discover_neighbor_error_strings[] = {
1699   [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
1700   [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]
1701   = "neighbor solicitations sent",
1702   [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
1703     = "no source address for ND solicitation",
1704 };
1705
1706 VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
1707   .function = ip6_discover_neighbor,
1708   .name = "ip6-discover-neighbor",
1709   .vector_size = sizeof (u32),
1710
1711   .format_trace = format_ip6_forward_next_trace,
1712
1713   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1714   .error_strings = ip6_discover_neighbor_error_strings,
1715
1716   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1717   .next_nodes = {
1718     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1719     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1720   },
1721 };
1722
1723 VLIB_REGISTER_NODE (ip6_glean_node) = {
1724   .function = ip6_glean,
1725   .name = "ip6-glean",
1726   .vector_size = sizeof (u32),
1727
1728   .format_trace = format_ip6_forward_next_trace,
1729
1730   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1731   .error_strings = ip6_discover_neighbor_error_strings,
1732
1733   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1734   .next_nodes = {
1735     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1736     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1737   },
1738 };
1739
1740 clib_error_t *
1741 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
1742 {
1743   vnet_main_t * vnm = vnet_get_main();
1744   ip6_main_t * im = &ip6_main;
1745   icmp6_neighbor_solicitation_header_t * h;
1746   ip6_address_t * src;
1747   ip_interface_address_t * ia;
1748   ip_adjacency_t * adj;
1749   vnet_hw_interface_t * hi;
1750   vnet_sw_interface_t * si;
1751   vlib_buffer_t * b;
1752   u32 bi = 0;
1753   int bogus_length;
1754
1755   si = vnet_get_sw_interface (vnm, sw_if_index);
1756
1757   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1758     {
1759       return clib_error_return (0, "%U: interface %U down",
1760                                 format_ip6_address, dst,
1761                                 format_vnet_sw_if_index_name, vnm,
1762                                 sw_if_index);
1763     }
1764
1765   src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1766   if (! src)
1767     {
1768       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1769       return clib_error_return
1770         (0, "no matching interface address for destination %U (interface %U)",
1771          format_ip6_address, dst,
1772          format_vnet_sw_if_index_name, vnm, sw_if_index);
1773     }
1774
1775   h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
1776
1777   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1778
1779   /* Destination address is a solicited node multicast address.  We need to fill in
1780      the low 24 bits with low 24 bits of target's address. */
1781   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1782   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1783   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1784
1785   h->ip.src_address = src[0];
1786   h->neighbor.target_address = dst[0];
1787
1788   clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
1789
1790   h->neighbor.icmp.checksum =
1791     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1792   ASSERT(bogus_length == 0);
1793
1794   b = vlib_get_buffer (vm, bi);
1795   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1796
1797   /* Add encapsulation string for software interface (e.g. ethernet header). */
1798   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
1799   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1800   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1801
1802   {
1803     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
1804     u32 * to_next = vlib_frame_vector_args (f);
1805     to_next[0] = bi;
1806     f->n_vectors = 1;
1807     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1808   }
1809
1810   return /* no error */ 0;
1811 }
1812
1813 typedef enum {
1814   IP6_REWRITE_NEXT_DROP,
1815   IP6_REWRITE_NEXT_ICMP_ERROR,
1816 } ip6_rewrite_next_t;
1817
1818 always_inline uword
1819 ip6_rewrite_inline (vlib_main_t * vm,
1820                     vlib_node_runtime_t * node,
1821                     vlib_frame_t * frame,
1822                     int rewrite_for_locally_received_packets,
1823                     int is_midchain)
1824 {
1825   ip_lookup_main_t * lm = &ip6_main.lookup_main;
1826   u32 * from = vlib_frame_vector_args (frame);
1827   u32 n_left_from, n_left_to_next, * to_next, next_index;
1828   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1829   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
1830   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
1831
1832   n_left_from = frame->n_vectors;
1833   next_index = node->cached_next_index;
1834   u32 cpu_index = os_get_cpu_number();
1835
1836   while (n_left_from > 0)
1837     {
1838       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1839
1840       while (n_left_from >= 4 && n_left_to_next >= 2)
1841         {
1842           ip_adjacency_t * adj0, * adj1;
1843           vlib_buffer_t * p0, * p1;
1844           ip6_header_t * ip0, * ip1;
1845           u32 pi0, rw_len0, next0, error0, adj_index0;
1846           u32 pi1, rw_len1, next1, error1, adj_index1;
1847           u32 tx_sw_if_index0, tx_sw_if_index1;
1848
1849           /* Prefetch next iteration. */
1850           {
1851             vlib_buffer_t * p2, * p3;
1852
1853             p2 = vlib_get_buffer (vm, from[2]);
1854             p3 = vlib_get_buffer (vm, from[3]);
1855
1856             vlib_prefetch_buffer_header (p2, LOAD);
1857             vlib_prefetch_buffer_header (p3, LOAD);
1858
1859             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1860             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1861
1862             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1863             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1864           }
1865
1866           pi0 = to_next[0] = from[0];
1867           pi1 = to_next[1] = from[1];
1868
1869           from += 2;
1870           n_left_from -= 2;
1871           to_next += 2;
1872           n_left_to_next -= 2;
1873
1874           p0 = vlib_get_buffer (vm, pi0);
1875           p1 = vlib_get_buffer (vm, pi1);
1876
1877           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
1878           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
1879
1880           /* We should never rewrite a pkt using the MISS adjacency */
1881           ASSERT(adj_index0 && adj_index1);
1882
1883           ip0 = vlib_buffer_get_current (p0);
1884           ip1 = vlib_buffer_get_current (p1);
1885
1886           error0 = error1 = IP6_ERROR_NONE;
1887           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1888
1889           if (! rewrite_for_locally_received_packets)
1890             {
1891               i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
1892
1893               /* Input node should have reject packets with hop limit 0. */
1894               ASSERT (ip0->hop_limit > 0);
1895               ASSERT (ip1->hop_limit > 0);
1896
1897               hop_limit0 -= 1;
1898               hop_limit1 -= 1;
1899
1900               ip0->hop_limit = hop_limit0;
1901               ip1->hop_limit = hop_limit1;
1902
1903               /*
1904                * If the hop count drops below 1 when forwarding, generate
1905                * an ICMP response.
1906                */
1907               if (PREDICT_FALSE(hop_limit0 <= 0))
1908                 {
1909                   error0 = IP6_ERROR_TIME_EXPIRED;
1910                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1911                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
1912                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
1913                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1914                 }
1915               if (PREDICT_FALSE(hop_limit1 <= 0))
1916                 {
1917                   error1 = IP6_ERROR_TIME_EXPIRED;
1918                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1919                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
1920                   icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded,
1921                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1922                 }
1923             }
1924
1925           adj0 = ip_get_adjacency (lm, adj_index0);
1926           adj1 = ip_get_adjacency (lm, adj_index1);
1927
1928           rw_len0 = adj0[0].rewrite_header.data_bytes;
1929           rw_len1 = adj1[0].rewrite_header.data_bytes;
1930           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
1931           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
1932
1933           vlib_increment_combined_counter (&adjacency_counters,
1934                                            cpu_index,
1935                                            adj_index0,
1936                                            /* packet increment */ 0,
1937                                            /* byte increment */ rw_len0);
1938           vlib_increment_combined_counter (&adjacency_counters,
1939                                            cpu_index,
1940                                            adj_index1,
1941                                            /* packet increment */ 0,
1942                                            /* byte increment */ rw_len1);
1943
1944           /* Check MTU of outgoing interface. */
1945           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
1946                     ? IP6_ERROR_MTU_EXCEEDED
1947                     : error0);
1948           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
1949                     ? IP6_ERROR_MTU_EXCEEDED
1950                     : error1);
1951
1952           /* Don't adjust the buffer for hop count issue; icmp-error node
1953            * wants to see the IP headerr */
1954           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
1955             {
1956               p0->current_data -= rw_len0;
1957               p0->current_length += rw_len0;
1958
1959               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1960               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
1961                   tx_sw_if_index0;
1962               next0 = adj0[0].rewrite_header.next_index;
1963
1964               if (PREDICT_FALSE
1965                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
1966                                     tx_sw_if_index0)))
1967                 {
1968                   p0->current_config_index =
1969                     vec_elt (cm->config_index_by_sw_if_index,
1970                              tx_sw_if_index0);
1971                   vnet_get_config_data (&cm->config_main,
1972                                         &p0->current_config_index,
1973                                         &next0,
1974                                         /* # bytes of config data */ 0);
1975                 }
1976             }
1977           if (PREDICT_TRUE(error1 == IP6_ERROR_NONE))
1978             {
1979               p1->current_data -= rw_len1;
1980               p1->current_length += rw_len1;
1981
1982               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1983               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
1984                   tx_sw_if_index1;
1985               next1 = adj1[0].rewrite_header.next_index;
1986
1987               if (PREDICT_FALSE
1988                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
1989                                     tx_sw_if_index1)))
1990                 {
1991                   p1->current_config_index =
1992                     vec_elt (cm->config_index_by_sw_if_index,
1993                              tx_sw_if_index1);
1994                   vnet_get_config_data (&cm->config_main,
1995                                         &p1->current_config_index,
1996                                         &next1,
1997                                         /* # bytes of config data */ 0);
1998                 }
1999             }
2000
2001           /* Guess we are only writing on simple Ethernet header. */
2002           vnet_rewrite_two_headers (adj0[0], adj1[0],
2003                                     ip0, ip1,
2004                                     sizeof (ethernet_header_t));
2005
2006           if (is_midchain)
2007           {
2008               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2009               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2010           }
2011
2012           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2013                                            to_next, n_left_to_next,
2014                                            pi0, pi1, next0, next1);
2015         }
2016
2017       while (n_left_from > 0 && n_left_to_next > 0)
2018         {
2019           ip_adjacency_t * adj0;
2020           vlib_buffer_t * p0;
2021           ip6_header_t * ip0;
2022           u32 pi0, rw_len0;
2023           u32 adj_index0, next0, error0;
2024           u32 tx_sw_if_index0;
2025
2026           pi0 = to_next[0] = from[0];
2027
2028           p0 = vlib_get_buffer (vm, pi0);
2029
2030           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2031
2032           /* We should never rewrite a pkt using the MISS adjacency */
2033           ASSERT(adj_index0);
2034
2035           adj0 = ip_get_adjacency (lm, adj_index0);
2036
2037           ip0 = vlib_buffer_get_current (p0);
2038
2039           error0 = IP6_ERROR_NONE;
2040           next0 = IP6_REWRITE_NEXT_DROP;
2041
2042           /* Check hop limit */
2043           if (! rewrite_for_locally_received_packets)
2044             {
2045               i32 hop_limit0 = ip0->hop_limit;
2046
2047               ASSERT (ip0->hop_limit > 0);
2048
2049               hop_limit0 -= 1;
2050
2051               ip0->hop_limit = hop_limit0;
2052
2053               if (PREDICT_FALSE(hop_limit0 <= 0))
2054                 {
2055                   /*
2056                    * If the hop count drops below 1 when forwarding, generate
2057                    * an ICMP response.
2058                    */
2059                   error0 = IP6_ERROR_TIME_EXPIRED;
2060                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2061                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2062                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
2063                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
2064                 }
2065             }
2066
2067           /* Guess we are only writing on simple Ethernet header. */
2068           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2069
2070           /* Update packet buffer attributes/set output interface. */
2071           rw_len0 = adj0[0].rewrite_header.data_bytes;
2072           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2073
2074           vlib_increment_combined_counter (&adjacency_counters,
2075                                            cpu_index,
2076                                            adj_index0,
2077                                            /* packet increment */ 0,
2078                                            /* byte increment */ rw_len0);
2079
2080           /* Check MTU of outgoing interface. */
2081           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2082                     ? IP6_ERROR_MTU_EXCEEDED
2083                     : error0);
2084
2085           /* Don't adjust the buffer for hop count issue; icmp-error node
2086            * wants to see the IP headerr */
2087           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
2088             {
2089               p0->current_data -= rw_len0;
2090               p0->current_length += rw_len0;
2091
2092               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2093
2094               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2095               next0 = adj0[0].rewrite_header.next_index;
2096
2097               if (PREDICT_FALSE
2098                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2099                                     tx_sw_if_index0)))
2100                   {
2101                     p0->current_config_index =
2102                       vec_elt (cm->config_index_by_sw_if_index,
2103                                tx_sw_if_index0);
2104                     vnet_get_config_data (&cm->config_main,
2105                                           &p0->current_config_index,
2106                                           &next0,
2107                                           /* # bytes of config data */ 0);
2108                   }
2109             }
2110
2111           if (is_midchain)
2112           {
2113               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2114           }
2115
2116           p0->error = error_node->errors[error0];
2117
2118           from += 1;
2119           n_left_from -= 1;
2120           to_next += 1;
2121           n_left_to_next -= 1;
2122
2123           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2124                                            to_next, n_left_to_next,
2125                                            pi0, next0);
2126         }
2127
2128       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2129     }
2130
2131   /* Need to do trace after rewrites to pick up new packet data. */
2132   if (node->flags & VLIB_NODE_FLAG_TRACE)
2133     ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
2134
2135   return frame->n_vectors;
2136 }
2137
2138 static uword
2139 ip6_rewrite_transit (vlib_main_t * vm,
2140                      vlib_node_runtime_t * node,
2141                      vlib_frame_t * frame)
2142 {
2143   return ip6_rewrite_inline (vm, node, frame,
2144                              /* rewrite_for_locally_received_packets */ 0,
2145                              /* midchain */ 0);
2146 }
2147
2148 static uword
2149 ip6_rewrite_local (vlib_main_t * vm,
2150                    vlib_node_runtime_t * node,
2151                    vlib_frame_t * frame)
2152 {
2153   return ip6_rewrite_inline (vm, node, frame,
2154                              /* rewrite_for_locally_received_packets */ 1,
2155                              /* midchain */ 0);
2156 }
2157
2158 static uword
2159 ip6_midchain (vlib_main_t * vm,
2160               vlib_node_runtime_t * node,
2161               vlib_frame_t * frame)
2162 {
2163   return ip6_rewrite_inline (vm, node, frame,
2164                              /* rewrite_for_locally_received_packets */ 0,
2165                              /* midchain */ 1);
2166 }
2167
2168 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2169   .function = ip6_midchain,
2170   .name = "ip6-midchain",
2171   .vector_size = sizeof (u32),
2172
2173   .format_trace = format_ip6_forward_next_trace,
2174
2175   .sibling_of = "ip6-rewrite",
2176 };
2177
2178 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
2179
2180 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
2181   .function = ip6_rewrite_transit,
2182   .name = "ip6-rewrite",
2183   .vector_size = sizeof (u32),
2184
2185   .format_trace = format_ip6_rewrite_trace,
2186
2187   .n_next_nodes = 2,
2188   .next_nodes = {
2189     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2190     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2191   },
2192 };
2193
2194 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit);
2195
2196 VLIB_REGISTER_NODE (ip6_rewrite_local_node) = {
2197   .function = ip6_rewrite_local,
2198   .name = "ip6-rewrite-local",
2199   .vector_size = sizeof (u32),
2200
2201   .sibling_of = "ip6-rewrite",
2202
2203   .format_trace = format_ip6_rewrite_trace,
2204
2205   .n_next_nodes = 0,
2206 };
2207
2208 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local);
2209
2210 /*
2211  * Hop-by-Hop handling
2212  */
2213
2214 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2215
2216 #define foreach_ip6_hop_by_hop_error \
2217 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2218 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2219 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2220
2221 typedef enum {
2222 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2223   foreach_ip6_hop_by_hop_error
2224 #undef _
2225   IP6_HOP_BY_HOP_N_ERROR,
2226 } ip6_hop_by_hop_error_t;
2227
2228 /*
2229  * Primary h-b-h handler trace support
2230  * We work pretty hard on the problem for obvious reasons
2231  */
2232 typedef struct {
2233   u32 next_index;
2234   u32 trace_len;
2235   u8 option_data[256];
2236 } ip6_hop_by_hop_trace_t;
2237
2238 vlib_node_registration_t ip6_hop_by_hop_node;
2239
2240 static char * ip6_hop_by_hop_error_strings[] = {
2241 #define _(sym,string) string,
2242   foreach_ip6_hop_by_hop_error
2243 #undef _
2244 };
2245
2246 static u8 *
2247 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2248 {
2249   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2250   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2251   ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2252   ip6_hop_by_hop_header_t *hbh0;
2253   ip6_hop_by_hop_option_t *opt0, *limit0;
2254   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2255
2256   u8 type0;
2257
2258   hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
2259
2260   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2261               t->next_index, (hbh0->length+1)<<3, t->trace_len);
2262
2263   opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
2264   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
2265
2266   while (opt0 < limit0) {
2267     type0 = opt0->type;
2268     switch (type0) {
2269     case 0: /* Pad, just stop */
2270       opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2271       break;
2272
2273     default:
2274       if (hm->trace[type0]) {
2275         s = (*hm->trace[type0])(s, opt0);
2276       } else {
2277         s = format (s, "\n    unrecognized option %d length %d", type0, opt0->length);
2278       }
2279       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2280       break;
2281     }
2282   }
2283   return s;
2284 }
2285
2286 always_inline u8 ip6_scan_hbh_options (
2287                                        vlib_buffer_t * b0,
2288                                        ip6_header_t *ip0,
2289                                        ip6_hop_by_hop_header_t *hbh0,
2290                                        ip6_hop_by_hop_option_t *opt0,
2291                                        ip6_hop_by_hop_option_t *limit0,
2292                                        u32 *next0)
2293 {
2294   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2295   u8 type0;
2296   u8 error0 = 0;
2297
2298   while (opt0 < limit0)
2299     {
2300       type0 = opt0->type;
2301       switch (type0)
2302         {
2303         case 0: /* Pad1 */
2304           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2305           continue;
2306         case 1: /* PadN */
2307           break;
2308         default:
2309           if (hm->options[type0])
2310             {
2311               if ((*hm->options[type0])(b0, ip0, opt0) < 0)
2312                 {
2313                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2314                   return(error0);
2315                 }
2316             }
2317           else
2318             {
2319               /* Unrecognized mandatory option, check the two high order bits */
2320               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2321                 {
2322                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2323                   break;
2324                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2325                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2326                   *next0 = IP_LOOKUP_NEXT_DROP;
2327                   break;
2328                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2329                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2330                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2331                   icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2332                                               ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2333                   break;
2334                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2335                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2336                   if (!ip6_address_is_multicast(&ip0->dst_address))
2337                     {
2338                       *next0 =  IP_LOOKUP_NEXT_ICMP_ERROR;
2339                       icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2340                                                   ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2341                     }
2342                   else
2343                     {
2344                       *next0 =  IP_LOOKUP_NEXT_DROP;
2345                     }
2346                   break;
2347                 }
2348               return(error0);
2349             }
2350         }
2351       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2352     }
2353   return(error0);
2354 }
2355
2356 /*
2357  * Process the Hop-by-Hop Options header
2358  */
2359 static uword
2360 ip6_hop_by_hop (vlib_main_t * vm,
2361                 vlib_node_runtime_t * node,
2362                 vlib_frame_t * frame)
2363 {
2364   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_hop_by_hop_node.index);
2365   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2366   u32 n_left_from, *from, *to_next;
2367   ip_lookup_next_t next_index;
2368   ip6_main_t * im = &ip6_main;
2369   ip_lookup_main_t *lm = &im->lookup_main;
2370
2371   from = vlib_frame_vector_args (frame);
2372   n_left_from = frame->n_vectors;
2373   next_index = node->cached_next_index;
2374
2375   while (n_left_from > 0) {
2376     u32 n_left_to_next;
2377
2378     vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2379
2380     while (n_left_from >= 4 && n_left_to_next >= 2) {
2381       u32 bi0, bi1;
2382       vlib_buffer_t * b0, *b1;
2383       u32 next0, next1;
2384       ip6_header_t * ip0, *ip1;
2385       ip6_hop_by_hop_header_t *hbh0, *hbh1;
2386       ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2387       u8 error0 = 0, error1 = 0;
2388
2389       /* Prefetch next iteration. */
2390       {
2391         vlib_buffer_t * p2, * p3;
2392
2393         p2 = vlib_get_buffer (vm, from[2]);
2394         p3 = vlib_get_buffer (vm, from[3]);
2395
2396         vlib_prefetch_buffer_header (p2, LOAD);
2397         vlib_prefetch_buffer_header (p3, LOAD);
2398
2399         CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2400         CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2401       }
2402
2403       /* Speculatively enqueue b0, b1 to the current next frame */
2404       to_next[0] = bi0 = from[0];
2405       to_next[1] = bi1 = from[1];
2406       from += 2;
2407       to_next += 2;
2408       n_left_from -= 2;
2409       n_left_to_next -= 2;
2410
2411       b0 = vlib_get_buffer (vm, bi0);
2412       b1 = vlib_get_buffer (vm, bi1);
2413       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2414       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2415       u32 adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
2416       ip_adjacency_t *adj1 = ip_get_adjacency(lm, adj_index1);
2417
2418       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2419       next0 = adj0->lookup_next_index;
2420       next1 = adj1->lookup_next_index;
2421
2422       ip0 = vlib_buffer_get_current (b0);
2423       ip1 = vlib_buffer_get_current (b1);
2424       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2425       hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1);
2426       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2427       opt1 = (ip6_hop_by_hop_option_t *)(hbh1+1);
2428       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2429       limit1 = (ip6_hop_by_hop_option_t *)((u8 *)hbh1 + ((hbh1->length + 1) << 3));
2430
2431       /*
2432        * Basic validity checks
2433        */
2434       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2435         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2436         next0 = IP_LOOKUP_NEXT_DROP;
2437         goto outdual;
2438       }
2439       /* Scan the set of h-b-h options, process ones that we understand */
2440       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2441
2442       if ((hbh1->length + 1) << 3 > clib_net_to_host_u16(ip1->payload_length)) {
2443         error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2444         next1 = IP_LOOKUP_NEXT_DROP;
2445         goto outdual;
2446       }
2447       /* Scan the set of h-b-h options, process ones that we understand */
2448       error1 = ip6_scan_hbh_options(b1,ip1,hbh1,opt1,limit1, &next1);
2449
2450     outdual:
2451       /* Has the classifier flagged this buffer for special treatment? */
2452       if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
2453         next0 = hm->next_override;
2454
2455       /* Has the classifier flagged this buffer for special treatment? */
2456       if ((error1 == 0) && (vnet_buffer(b1)->l2_classify.opaque_index == OI_DECAP))
2457         next1 = hm->next_override;
2458
2459       if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
2460         {
2461           if (b0->flags & VLIB_BUFFER_IS_TRACED) {
2462             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2463             u32 trace_len = (hbh0->length + 1) << 3;
2464             t->next_index = next0;
2465             /* Capture the h-b-h option verbatim */
2466             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2467             t->trace_len = trace_len;
2468             clib_memcpy(t->option_data, hbh0, trace_len);
2469           }
2470           if (b1->flags & VLIB_BUFFER_IS_TRACED) {
2471             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b1, sizeof (*t));
2472             u32 trace_len = (hbh1->length + 1) << 3;
2473             t->next_index = next1;
2474             /* Capture the h-b-h option verbatim */
2475             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2476             t->trace_len = trace_len;
2477             clib_memcpy(t->option_data, hbh1, trace_len);
2478           }
2479
2480         }
2481
2482       b0->error = error_node->errors[error0];
2483       b1->error = error_node->errors[error1];
2484
2485       /* verify speculative enqueue, maybe switch current next frame */
2486       vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0,
2487                                        bi1,next0, next1);
2488     }
2489
2490     while (n_left_from > 0 && n_left_to_next > 0) {
2491       u32 bi0;
2492       vlib_buffer_t * b0;
2493       u32 next0;
2494       ip6_header_t * ip0;
2495       ip6_hop_by_hop_header_t *hbh0;
2496       ip6_hop_by_hop_option_t *opt0, *limit0;
2497       u8 error0 = 0;
2498
2499       /* Speculatively enqueue b0 to the current next frame */
2500       bi0 = from[0];
2501       to_next[0] = bi0;
2502       from += 1;
2503       to_next += 1;
2504       n_left_from -= 1;
2505       n_left_to_next -= 1;
2506
2507       b0 = vlib_get_buffer (vm, bi0);
2508       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2509       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2510       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2511       next0 = adj0->lookup_next_index;
2512
2513       ip0 = vlib_buffer_get_current (b0);
2514       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2515       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2516       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2517
2518       /*
2519        * Basic validity checks
2520        */
2521       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2522         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2523         next0 = IP_LOOKUP_NEXT_DROP;
2524         goto out0;
2525       }
2526
2527       /* Scan the set of h-b-h options, process ones that we understand */
2528       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2529
2530     out0:
2531       /* Has the classifier flagged this buffer for special treatment? */
2532       if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
2533         next0 = hm->next_override;
2534
2535       if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) {
2536         ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2537         u32 trace_len = (hbh0->length + 1) << 3;
2538         t->next_index = next0;
2539         /* Capture the h-b-h option verbatim */
2540         trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2541         t->trace_len = trace_len;
2542         clib_memcpy(t->option_data, hbh0, trace_len);
2543       }
2544
2545       b0->error = error_node->errors[error0];
2546
2547       /* verify speculative enqueue, maybe switch current next frame */
2548       vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0);
2549     }
2550     vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2551   }
2552   return frame->n_vectors;
2553 }
2554
2555 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
2556   .function = ip6_hop_by_hop,
2557   .name = "ip6-hop-by-hop",
2558   .sibling_of = "ip6-lookup",
2559   .vector_size = sizeof (u32),
2560   .format_trace = format_ip6_hop_by_hop_trace,
2561   .type = VLIB_NODE_TYPE_INTERNAL,
2562   .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
2563   .error_strings = ip6_hop_by_hop_error_strings,
2564   .n_next_nodes = 0,
2565 };
2566
2567 VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
2568
2569 static clib_error_t *
2570 ip6_hop_by_hop_init (vlib_main_t * vm)
2571 {
2572   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2573   memset(hm->options, 0, sizeof(hm->options));
2574   memset(hm->trace, 0, sizeof(hm->trace));
2575   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2576   return (0);
2577 }
2578
2579 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2580
2581 void ip6_hbh_set_next_override (uword next)
2582 {
2583   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2584
2585   hm->next_override = next;
2586 }
2587
2588 int
2589 ip6_hbh_register_option (u8 option,
2590                          int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt),
2591                          u8 *trace(u8 *s, ip6_hop_by_hop_option_t *opt))
2592 {
2593   ip6_main_t * im = &ip6_main;
2594   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2595
2596   ASSERT (option < ARRAY_LEN (hm->options));
2597
2598   /* Already registered */
2599   if (hm->options[option])
2600     return (-1);
2601
2602   hm->options[option] = options;
2603   hm->trace[option] = trace;
2604
2605   /* Set global variable */
2606   im->hbh_enabled = 1;
2607
2608   return (0);
2609 }
2610
2611 int
2612 ip6_hbh_unregister_option (u8 option)
2613 {
2614   ip6_main_t * im = &ip6_main;
2615   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2616
2617   ASSERT (option < ARRAY_LEN (hm->options));
2618
2619   /* Not registered */
2620   if (!hm->options[option])
2621     return (-1);
2622
2623   hm->options[option] = NULL;
2624   hm->trace[option] = NULL;
2625
2626   /* Disable global knob if this was the last option configured */
2627   int i;
2628   bool found = false;
2629   for (i = 0; i < 256; i++) {
2630     if (hm->options[option]) {
2631       found = true;
2632       break;
2633     }
2634   }
2635   if (!found)
2636     im->hbh_enabled = 0;
2637
2638   return (0);
2639 }
2640
2641 /* Global IP6 main. */
2642 ip6_main_t ip6_main;
2643
2644 static clib_error_t *
2645 ip6_lookup_init (vlib_main_t * vm)
2646 {
2647   ip6_main_t * im = &ip6_main;
2648   clib_error_t * error;
2649   uword i;
2650
2651   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2652     {
2653       u32 j, i0, i1;
2654
2655       i0 = i / 32;
2656       i1 = i % 32;
2657
2658       for (j = 0; j < i0; j++)
2659         im->fib_masks[i].as_u32[j] = ~0;
2660
2661       if (i1)
2662         im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2663     }
2664
2665   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2666
2667   if (im->lookup_table_nbuckets == 0)
2668     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2669
2670   im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
2671
2672   if (im->lookup_table_size == 0)
2673     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2674
2675   BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2676                         "ip6 FIB fwding table",
2677                         im->lookup_table_nbuckets,
2678                         im->lookup_table_size);
2679   BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2680                         "ip6 FIB non-fwding table",
2681                         im->lookup_table_nbuckets,
2682                         im->lookup_table_size);
2683
2684   /* Create FIB with index 0 and table id of 0. */
2685   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0);
2686
2687   {
2688     pg_node_t * pn;
2689     pn = pg_get_node (ip6_lookup_node.index);
2690     pn->unformat_edit = unformat_pg_ip6_header;
2691   }
2692
2693   /* Unless explicitly configured, don't process HBH options */
2694   im->hbh_enabled = 0;
2695
2696   {
2697     icmp6_neighbor_solicitation_header_t p;
2698
2699     memset (&p, 0, sizeof (p));
2700
2701     p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
2702     p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
2703                                                 - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
2704     p.ip.protocol = IP_PROTOCOL_ICMP6;
2705     p.ip.hop_limit = 255;
2706     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2707
2708     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2709
2710     p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2711     p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
2712
2713     vlib_packet_template_init (vm,
2714                                &im->discover_neighbor_packet_template,
2715                                &p, sizeof (p),
2716                                /* alloc chunk size */ 8,
2717                                "ip6 neighbor discovery");
2718   }
2719
2720   error = ip6_feature_init (vm, im);
2721
2722   return error;
2723 }
2724
2725 VLIB_INIT_FUNCTION (ip6_lookup_init);
2726
2727 static clib_error_t *
2728 add_del_ip6_interface_table (vlib_main_t * vm,
2729                              unformat_input_t * input,
2730                              vlib_cli_command_t * cmd)
2731 {
2732   vnet_main_t * vnm = vnet_get_main();
2733   clib_error_t * error = 0;
2734   u32 sw_if_index, table_id;
2735
2736   sw_if_index = ~0;
2737
2738   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2739     {
2740       error = clib_error_return (0, "unknown interface `%U'",
2741                                  format_unformat_error, input);
2742       goto done;
2743     }
2744
2745   if (unformat (input, "%d", &table_id))
2746     ;
2747   else
2748     {
2749       error = clib_error_return (0, "expected table id `%U'",
2750                                  format_unformat_error, input);
2751       goto done;
2752     }
2753
2754   {
2755     u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
2756                                                       table_id);
2757
2758     vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
2759     ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
2760   }
2761
2762
2763  done:
2764   return error;
2765 }
2766
2767 /*?
2768  * Place the indicated interface into the supplied IPv6 FIB table (also known
2769  * as a VRF). If the FIB table does not exist, this command creates it. To
2770  * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
2771  * FIB table will only be displayed if a route has been added to the table, or
2772  * an IP Address is assigned to an interface in the table (which adds a route
2773  * automatically).
2774  *
2775  * @note IP addresses added after setting the interface IP table end up in
2776  * the indicated FIB table. If the IP address is added prior to adding the
2777  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2778  * but potentially counter-intuitive results occur if you provision interface
2779  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2780  * IP table ID provisioned. It might be marginally useful to evade source RPF
2781  * drops to put an interface address into multiple FIBs.
2782  *
2783  * @cliexpar
2784  * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
2785  * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
2786  ?*/
2787 /* *INDENT-OFF* */
2788 VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = {
2789   .path = "set interface ip6 table",
2790   .function = add_del_ip6_interface_table,
2791   .short_help = "set interface ip6 table <interface> <table-id>"
2792 };
2793 /* *INDENT-ON* */
2794
2795 void
2796 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
2797                                                   u8 *mac)
2798 {
2799   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2800   /* Invert the "u" bit */
2801   ip->as_u8 [8] = mac[0] ^ (1<<1);
2802   ip->as_u8 [9] = mac[1];
2803   ip->as_u8 [10] = mac[2];
2804   ip->as_u8 [11] = 0xFF;
2805   ip->as_u8 [12] = 0xFE;
2806   ip->as_u8 [13] = mac[3];
2807   ip->as_u8 [14] = mac[4];
2808   ip->as_u8 [15] = mac[5];
2809 }
2810
2811 void
2812 ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
2813                                                   ip6_address_t *ip)
2814 {
2815   /* Invert the previously inverted "u" bit */
2816   mac[0] = ip->as_u8 [8] ^ (1<<1);
2817   mac[1] = ip->as_u8 [9];
2818   mac[2] = ip->as_u8 [10];
2819   mac[3] = ip->as_u8 [13];
2820   mac[4] = ip->as_u8 [14];
2821   mac[5] = ip->as_u8 [15];
2822 }
2823
2824 static clib_error_t *
2825 test_ip6_link_command_fn (vlib_main_t * vm,
2826                           unformat_input_t * input,
2827                           vlib_cli_command_t * cmd)
2828 {
2829   u8 mac[6];
2830   ip6_address_t _a, *a = &_a;
2831
2832   if (unformat (input, "%U", unformat_ethernet_address, mac))
2833     {
2834       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2835       vlib_cli_output (vm, "Link local address: %U",
2836                        format_ip6_address, a);
2837       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2838       vlib_cli_output (vm, "Original MAC address: %U",
2839                        format_ethernet_address, mac);
2840     }
2841
2842   return 0;
2843 }
2844
2845 /*?
2846  * This command converts the given MAC Address into an IPv6 link-local
2847  * address.
2848  *
2849  * @cliexpar
2850  * Example of how to create an IPv6 link-local address:
2851  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2852  * Link local address: fe80::14d9:e0ff:fe91:7986
2853  * Original MAC address: 16:d9:e0:91:79:86
2854  * @cliexend
2855 ?*/
2856 /* *INDENT-OFF* */
2857 VLIB_CLI_COMMAND (test_link_command, static) = {
2858   .path = "test ip6 link",
2859   .function = test_ip6_link_command_fn,
2860   .short_help = "test ip6 link <mac-address>",
2861 };
2862 /* *INDENT-ON* */
2863
2864 int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2865 {
2866   ip6_main_t * im6 = &ip6_main;
2867   ip6_fib_t * fib;
2868   uword * p = hash_get (im6->fib_index_by_table_id, table_id);
2869
2870   if (p == 0)
2871     return -1;
2872
2873   fib = ip6_fib_get (p[0]);
2874
2875   fib->flow_hash_config = flow_hash_config;
2876   return 1;
2877 }
2878
2879 static clib_error_t *
2880 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2881                               unformat_input_t * input,
2882                               vlib_cli_command_t * cmd)
2883 {
2884   int matched = 0;
2885   u32 table_id = 0;
2886   u32 flow_hash_config = 0;
2887   int rv;
2888
2889   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2890     if (unformat (input, "table %d", &table_id))
2891       matched = 1;
2892 #define _(a,v) \
2893     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2894     foreach_flow_hash_bit
2895 #undef _
2896     else break;
2897   }
2898
2899   if (matched == 0)
2900     return clib_error_return (0, "unknown input `%U'",
2901                               format_unformat_error, input);
2902
2903   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2904   switch (rv)
2905     {
2906     case 1:
2907       break;
2908
2909     case -1:
2910       return clib_error_return (0, "no such FIB table %d", table_id);
2911
2912     default:
2913       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2914       break;
2915     }
2916
2917   return 0;
2918 }
2919
2920 /*?
2921  * Configure the set of IPv6 fields used by the flow hash.
2922  *
2923  * @cliexpar
2924  * @parblock
2925  * Example of how to set the flow hash on a given table:
2926  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2927  *
2928  * Example of display the configured flow hash:
2929  * @cliexstart{show ip6 fib}
2930  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2931  * @::/0
2932  *   unicast-ip6-chain
2933  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2934  *     [0] [@0]: dpo-drop ip6
2935  * fe80::/10
2936  *   unicast-ip6-chain
2937  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2938  *     [0] [@2]: dpo-receive
2939  * ff02::1/128
2940  *   unicast-ip6-chain
2941  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2942  *     [0] [@2]: dpo-receive
2943  * ff02::2/128
2944  *   unicast-ip6-chain
2945  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2946  *     [0] [@2]: dpo-receive
2947  * ff02::16/128
2948  *   unicast-ip6-chain
2949  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2950  *     [0] [@2]: dpo-receive
2951  * ff02::1:ff00:0/104
2952  *   unicast-ip6-chain
2953  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2954  *     [0] [@2]: dpo-receive
2955  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2956  * @::/0
2957  *   unicast-ip6-chain
2958  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2959  *     [0] [@0]: dpo-drop ip6
2960  * @::a:1:1:0:4/126
2961  *   unicast-ip6-chain
2962  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2963  *     [0] [@4]: ipv6-glean: af_packet0
2964  * @::a:1:1:0:7/128
2965  *   unicast-ip6-chain
2966  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2967  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2968  * fe80::/10
2969  *   unicast-ip6-chain
2970  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2971  *     [0] [@2]: dpo-receive
2972  * fe80::fe:3eff:fe3e:9222/128
2973  *   unicast-ip6-chain
2974  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2975  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2976  * ff02::1/128
2977  *   unicast-ip6-chain
2978  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2979  *     [0] [@2]: dpo-receive
2980  * ff02::2/128
2981  *   unicast-ip6-chain
2982  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2983  *     [0] [@2]: dpo-receive
2984  * ff02::16/128
2985  *   unicast-ip6-chain
2986  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2987  *     [0] [@2]: dpo-receive
2988  * ff02::1:ff00:0/104
2989  *   unicast-ip6-chain
2990  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2991  *     [0] [@2]: dpo-receive
2992  * @cliexend
2993  * @endparblock
2994 ?*/
2995 /* *INDENT-OFF* */
2996 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2997     .path = "set ip6 flow-hash",
2998     .short_help =
2999     "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3000     .function = set_ip6_flow_hash_command_fn,
3001 };
3002 /* *INDENT-ON* */
3003
3004 static clib_error_t *
3005 show_ip6_local_command_fn (vlib_main_t * vm,
3006                            unformat_input_t * input,
3007                            vlib_cli_command_t * cmd)
3008 {
3009   ip6_main_t * im = &ip6_main;
3010   ip_lookup_main_t * lm = &im->lookup_main;
3011   int i;
3012
3013   vlib_cli_output (vm, "Protocols handled by ip6_local");
3014   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
3015     {
3016       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
3017         vlib_cli_output (vm, "%d", i);
3018     }
3019   return 0;
3020 }
3021
3022
3023
3024 /*?
3025  * Display the set of protocols handled by the local IPv6 stack.
3026  *
3027  * @cliexpar
3028  * Example of how to display local protocol table:
3029  * @cliexstart{show ip6 local}
3030  * Protocols handled by ip6_local
3031  * 17
3032  * 43
3033  * 58
3034  * 115
3035  * @cliexend
3036 ?*/
3037 /* *INDENT-OFF* */
3038 VLIB_CLI_COMMAND (show_ip6_local, static) = {
3039   .path = "show ip6 local",
3040   .function = show_ip6_local_command_fn,
3041   .short_help = "show ip6 local",
3042 };
3043 /* *INDENT-ON* */
3044
3045 int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3046                                  u32 table_index)
3047 {
3048   vnet_main_t * vnm = vnet_get_main();
3049   vnet_interface_main_t * im = &vnm->interface_main;
3050   ip6_main_t * ipm = &ip6_main;
3051   ip_lookup_main_t * lm = &ipm->lookup_main;
3052   vnet_classify_main_t * cm = &vnet_classify_main;
3053   ip6_address_t *if_addr;
3054
3055   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3056     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3057
3058   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3059     return VNET_API_ERROR_NO_SUCH_ENTRY;
3060
3061   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3062   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3063
3064   if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL);
3065
3066   if (NULL != if_addr)
3067   {
3068       fib_prefix_t pfx = {
3069           .fp_len = 128,
3070           .fp_proto = FIB_PROTOCOL_IP6,
3071           .fp_addr.ip6 = *if_addr,
3072       };
3073       u32 fib_index;
3074
3075       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3076                                                       sw_if_index);
3077
3078
3079       if (table_index != (u32) ~0)
3080       {
3081           dpo_id_t dpo = DPO_INVALID;
3082
3083           dpo_set(&dpo,
3084                   DPO_CLASSIFY,
3085                   DPO_PROTO_IP6,
3086                   classify_dpo_create(DPO_PROTO_IP6,
3087                                       table_index));
3088
3089           fib_table_entry_special_dpo_add(fib_index,
3090                                           &pfx,
3091                                           FIB_SOURCE_CLASSIFY,
3092                                           FIB_ENTRY_FLAG_NONE,
3093                                           &dpo);
3094           dpo_reset(&dpo);
3095       }
3096       else
3097       {
3098           fib_table_entry_special_remove(fib_index,
3099                                          &pfx,
3100                                          FIB_SOURCE_CLASSIFY);
3101       }
3102   }
3103
3104   return 0;
3105 }
3106
3107 static clib_error_t *
3108 set_ip6_classify_command_fn (vlib_main_t * vm,
3109                              unformat_input_t * input,
3110                              vlib_cli_command_t * cmd)
3111 {
3112   u32 table_index = ~0;
3113   int table_index_set = 0;
3114   u32 sw_if_index = ~0;
3115   int rv;
3116
3117   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3118     if (unformat (input, "table-index %d", &table_index))
3119       table_index_set = 1;
3120     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3121                        vnet_get_main(), &sw_if_index))
3122         ;
3123     else
3124         break;
3125   }
3126
3127   if (table_index_set == 0)
3128       return clib_error_return (0, "classify table-index must be specified");
3129
3130   if (sw_if_index == ~0)
3131     return clib_error_return (0, "interface / subif must be specified");
3132
3133   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3134
3135   switch (rv)
3136     {
3137     case 0:
3138       break;
3139
3140     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3141       return clib_error_return (0, "No such interface");
3142
3143     case VNET_API_ERROR_NO_SUCH_ENTRY:
3144       return clib_error_return (0, "No such classifier table");
3145     }
3146   return 0;
3147 }
3148
3149 /*?
3150  * Assign a classification table to an interface. The classification
3151  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3152  * commands. Once the table is create, use this command to filter packets
3153  * on an interface.
3154  *
3155  * @cliexpar
3156  * Example of how to assign a classification table to an interface:
3157  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3158 ?*/
3159 /* *INDENT-OFF* */
3160 VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
3161     .path = "set ip6 classify",
3162     .short_help =
3163     "set ip6 classify intfc <interface> table-index <classify-idx>",
3164     .function = set_ip6_classify_command_fn,
3165 };
3166 /* *INDENT-ON* */
3167
3168 static clib_error_t *
3169 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3170 {
3171   ip6_main_t * im = &ip6_main;
3172   uword heapsize = 0;
3173   u32 tmp;
3174   u32 nbuckets = 0;
3175
3176   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3177     if (unformat (input, "hash-buckets %d", &tmp))
3178       nbuckets = tmp;
3179     else if (unformat (input, "heap-size %dm", &tmp))
3180       heapsize = ((u64)tmp) << 20;
3181     else if (unformat (input, "heap-size %dM", &tmp))
3182       heapsize = ((u64)tmp) << 20;
3183     else if (unformat (input, "heap-size %dg", &tmp))
3184       heapsize = ((u64)tmp) << 30;
3185     else if (unformat (input, "heap-size %dG", &tmp))
3186       heapsize = ((u64)tmp) << 30;
3187     else
3188       return clib_error_return (0, "unknown input '%U'",
3189                                 format_unformat_error, input);
3190   }
3191
3192   im->lookup_table_nbuckets = nbuckets;
3193   im->lookup_table_size = heapsize;
3194
3195   return 0;
3196 }
3197
3198 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3199
3200 #define TEST_CODE 1
3201 #if TEST_CODE > 0
3202
3203 static clib_error_t *
3204 set_interface_ip6_output_feature_command_fn (vlib_main_t * vm,
3205                                              unformat_input_t * input,
3206                                              vlib_cli_command_t * cmd)
3207 {
3208   vnet_main_t * vnm = vnet_get_main();
3209   u32 sw_if_index = ~0;
3210   int is_add = 1;
3211   ip6_main_t * im = &ip6_main;
3212   ip_lookup_main_t * lm = &im->lookup_main;
3213
3214   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3215     {
3216       if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3217         ;
3218       else if (unformat (input, "del"))
3219         is_add = 0;
3220       else
3221         break;
3222     }
3223
3224   if (sw_if_index == ~0)
3225     return clib_error_return (0, "unknown interface `%U'",
3226                               format_unformat_error, input);
3227
3228   lm->tx_sw_if_has_ip_output_features =
3229     clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3230
3231   return 0;
3232 }
3233
3234 /*?
3235  * Enable or disable the output feature on an interface.
3236  *
3237  * @todo Need a more detailed description.
3238  *
3239  * @cliexpar
3240  * Example of how to enable the output feature on an interface:
3241  * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0}
3242  * Example of how to disable the output feature on an interface:
3243  * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0 del}
3244 ?*/
3245 /* *INDENT-OFF* */
3246 VLIB_CLI_COMMAND (set_interface_ip6_output_feature, static) = {
3247   .path = "set interface ip6 output feature",
3248   .function = set_interface_ip6_output_feature_command_fn,
3249   .short_help = "set interface ip6 output feature <interface> [del]",
3250 };
3251 /* *INDENT-ON* */
3252
3253 #endif /* TEST_CODE */