d48ccad698454e778d0b5d4d56715c51af7380f6
[vpp.git] / vnet / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
44 #include <vppinfra/cache.h>
45 #include <vnet/fib/fib_table.h>
46 #include <vnet/fib/ip6_fib.h>
47 #include <vnet/dpo/load_balance.h>
48 #include <vnet/dpo/classify_dpo.h>
49
50 #include <vppinfra/bihash_template.c>
51
52 /**
53  * @file
54  * @brief IPv6 Forwarding.
55  *
56  * This file contains the source code for IPv6 forwarding.
57  */
58
59 void
60 ip6_forward_next_trace (vlib_main_t * vm,
61                         vlib_node_runtime_t * node,
62                         vlib_frame_t * frame,
63                         vlib_rx_or_tx_t which_adj_index);
64
65 always_inline uword
66 ip6_lookup_inline (vlib_main_t * vm,
67                    vlib_node_runtime_t * node,
68                    vlib_frame_t * frame)
69 {
70   ip6_main_t * im = &ip6_main;
71   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
72   u32 n_left_from, n_left_to_next, * from, * to_next;
73   ip_lookup_next_t next;
74   u32 cpu_index = os_get_cpu_number();
75
76   from = vlib_frame_vector_args (frame);
77   n_left_from = frame->n_vectors;
78   next = node->cached_next_index;
79
80   while (n_left_from > 0)
81     {
82       vlib_get_next_frame (vm, node, next,
83                            to_next, n_left_to_next);
84
85       while (n_left_from >= 4 && n_left_to_next >= 2)
86         {
87           vlib_buffer_t * p0, * p1;
88           u32 pi0, pi1, lbi0, lbi1, wrong_next;
89           ip_lookup_next_t next0, next1;
90           ip6_header_t * ip0, * ip1;
91           ip6_address_t * dst_addr0, * dst_addr1;
92           u32 fib_index0, fib_index1;
93           u32 flow_hash_config0, flow_hash_config1;
94           const dpo_id_t *dpo0, *dpo1;
95           const load_balance_t *lb0, *lb1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
107             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
108           }
109
110           pi0 = to_next[0] = from[0];
111           pi1 = to_next[1] = from[1];
112
113           p0 = vlib_get_buffer (vm, pi0);
114           p1 = vlib_get_buffer (vm, pi1);
115
116           ip0 = vlib_buffer_get_current (p0);
117           ip1 = vlib_buffer_get_current (p1);
118
119           dst_addr0 = &ip0->dst_address;
120           dst_addr1 = &ip1->dst_address;
121
122           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
123           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
124
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
131           lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
132
133           lb0 = load_balance_get (lbi0);
134           lb1 = load_balance_get (lbi1);
135
136           vnet_buffer (p0)->ip.flow_hash =
137             vnet_buffer(p1)->ip.flow_hash = 0;
138
139           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
140             {
141               flow_hash_config0 = lb0->lb_hash_config;
142               vnet_buffer (p0)->ip.flow_hash =
143                 ip6_compute_flow_hash (ip0, flow_hash_config0);
144             }
145           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
146             {
147               flow_hash_config1 = lb1->lb_hash_config;
148               vnet_buffer (p1)->ip.flow_hash =
149                 ip6_compute_flow_hash (ip1, flow_hash_config1);
150             }
151
152           ASSERT (lb0->lb_n_buckets > 0);
153           ASSERT (lb1->lb_n_buckets > 0);
154           ASSERT (is_pow2 (lb0->lb_n_buckets));
155           ASSERT (is_pow2 (lb1->lb_n_buckets));
156           dpo0 = load_balance_get_bucket_i(lb0,
157                                            (vnet_buffer (p0)->ip.flow_hash &
158                                             lb0->lb_n_buckets_minus_1));
159           dpo1 = load_balance_get_bucket_i(lb1,
160                                            (vnet_buffer (p1)->ip.flow_hash &
161                                             lb1->lb_n_buckets_minus_1));
162
163           next0 = dpo0->dpoi_next_node;
164           next1 = dpo1->dpoi_next_node;
165
166           /* Only process the HBH Option Header if explicitly configured to do so */
167           next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
168                    im->hbh_enabled) ?
169             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
170             next0;
171           next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
172                    im->hbh_enabled) ?
173             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
174             next1;
175
176           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
177           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
178
179           vlib_increment_combined_counter
180               (cm, cpu_index, lbi0, 1,
181                vlib_buffer_length_in_chain (vm, p0));
182           vlib_increment_combined_counter
183               (cm, cpu_index, lbi1, 1,
184                vlib_buffer_length_in_chain (vm, p1));
185
186           from += 2;
187           to_next += 2;
188           n_left_to_next -= 2;
189           n_left_from -= 2;
190
191           wrong_next = (next0 != next) + 2*(next1 != next);
192           if (PREDICT_FALSE (wrong_next != 0))
193             {
194               switch (wrong_next)
195                 {
196                 case 1:
197                   /* A B A */
198                   to_next[-2] = pi1;
199                   to_next -= 1;
200                   n_left_to_next += 1;
201                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
202                   break;
203
204                 case 2:
205                   /* A A B */
206                   to_next -= 1;
207                   n_left_to_next += 1;
208                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
209                   break;
210
211                 case 3:
212                   /* A B C */
213                   to_next -= 2;
214                   n_left_to_next += 2;
215                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
216                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
217                   if (next0 == next1)
218                     {
219                       /* A B B */
220                       vlib_put_next_frame (vm, node, next, n_left_to_next);
221                       next = next1;
222                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
223                     }
224                 }
225             }
226         }
227
228       while (n_left_from > 0 && n_left_to_next > 0)
229         {
230           vlib_buffer_t * p0;
231           ip6_header_t * ip0;
232           u32 pi0, lbi0;
233           ip_lookup_next_t next0;
234           load_balance_t * lb0;
235           ip6_address_t * dst_addr0;
236           u32 fib_index0, flow_hash_config0;
237           const dpo_id_t *dpo0;
238
239           pi0 = from[0];
240           to_next[0] = pi0;
241
242           p0 = vlib_get_buffer (vm, pi0);
243
244           ip0 = vlib_buffer_get_current (p0);
245
246           dst_addr0 = &ip0->dst_address;
247
248           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
249           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
250             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
251
252           flow_hash_config0 =
253               ip6_fib_get (fib_index0)->flow_hash_config;
254
255           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
256
257           lb0 = load_balance_get (lbi0);
258
259           vnet_buffer (p0)->ip.flow_hash = 0;
260
261           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
262             {
263               flow_hash_config0 = lb0->lb_hash_config;
264               vnet_buffer (p0)->ip.flow_hash =
265                 ip6_compute_flow_hash (ip0, flow_hash_config0);
266             }
267
268           ASSERT (lb0->lb_n_buckets > 0);
269           ASSERT (is_pow2 (lb0->lb_n_buckets));
270           dpo0 = load_balance_get_bucket_i(lb0,
271                                            (vnet_buffer (p0)->ip.flow_hash &
272                                             lb0->lb_n_buckets_minus_1));
273           next0 = dpo0->dpoi_next_node;
274           /* Only process the HBH Option Header if explicitly configured to do so */
275           next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) &&
276                    im->hbh_enabled) ?
277             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP :
278             next0;
279
280           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
281
282           vlib_increment_combined_counter
283               (cm, cpu_index, lbi0, 1,
284                vlib_buffer_length_in_chain (vm, p0));
285
286           from += 1;
287           to_next += 1;
288           n_left_to_next -= 1;
289           n_left_from -= 1;
290
291           if (PREDICT_FALSE (next0 != next))
292             {
293               n_left_to_next += 1;
294               vlib_put_next_frame (vm, node, next, n_left_to_next);
295               next = next0;
296               vlib_get_next_frame (vm, node, next,
297                                    to_next, n_left_to_next);
298               to_next[0] = pi0;
299               to_next += 1;
300               n_left_to_next -= 1;
301             }
302         }
303
304       vlib_put_next_frame (vm, node, next, n_left_to_next);
305     }
306
307   if (node->flags & VLIB_NODE_FLAG_TRACE)
308     ip6_forward_next_trace(vm, node, frame, VLIB_TX);
309
310   return frame->n_vectors;
311 }
312
313 static void
314 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
315                           ip6_main_t * im, u32 fib_index,
316                           ip_interface_address_t * a)
317 {
318   ip_lookup_main_t * lm = &im->lookup_main;
319   ip6_address_t * address = ip_interface_address_get_address (lm, a);
320   fib_prefix_t pfx = {
321       .fp_len = a->address_length,
322       .fp_proto = FIB_PROTOCOL_IP6,
323       .fp_addr.ip6 = *address,
324   };
325
326   a->neighbor_probe_adj_index = ~0;
327   if (a->address_length < 128)
328   {
329       fib_node_index_t fei;
330
331       fei = fib_table_entry_update_one_path(fib_index,
332                                             &pfx,
333                                             FIB_SOURCE_INTERFACE,
334                                             (FIB_ENTRY_FLAG_CONNECTED |
335                                              FIB_ENTRY_FLAG_ATTACHED),
336                                             FIB_PROTOCOL_IP6,
337                                             NULL, /* No next-hop address */
338                                             sw_if_index,
339                                             ~0, // invalid FIB index
340                                             1,
341                                             MPLS_LABEL_INVALID,
342                                             FIB_ROUTE_PATH_FLAG_NONE);
343       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
344   }
345
346   pfx.fp_len = 128;
347   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
348   {
349       u32 classify_table_index =
350           lm->classify_table_index_by_sw_if_index [sw_if_index];
351       if (classify_table_index != (u32) ~0)
352       {
353           dpo_id_t dpo = DPO_NULL;
354
355           dpo_set(&dpo,
356                   DPO_CLASSIFY,
357                   DPO_PROTO_IP4,
358                   classify_dpo_create(FIB_PROTOCOL_IP6,
359                                       classify_table_index));
360
361           fib_table_entry_special_dpo_add(fib_index,
362                                           &pfx,
363                                           FIB_SOURCE_CLASSIFY,
364                                           FIB_ENTRY_FLAG_NONE,
365                                           &dpo);
366           dpo_reset(&dpo);
367       }
368   }
369
370   fib_table_entry_update_one_path(fib_index,
371                                   &pfx,
372                                   FIB_SOURCE_INTERFACE,
373                                   (FIB_ENTRY_FLAG_CONNECTED |
374                                    FIB_ENTRY_FLAG_LOCAL),
375                                   FIB_PROTOCOL_IP6,
376                                   &pfx.fp_addr,
377                                   sw_if_index,
378                                   ~0, // invalid FIB index
379                                   1,
380                                   MPLS_LABEL_INVALID,
381                                   FIB_ROUTE_PATH_FLAG_NONE);
382 }
383
384 static void
385 ip6_del_interface_routes (ip6_main_t * im,
386                           u32 fib_index,
387                           ip6_address_t * address,
388                           u32 address_length)
389 {
390     fib_prefix_t pfx = {
391         .fp_len = address_length,
392         .fp_proto = FIB_PROTOCOL_IP6,
393         .fp_addr.ip6 = *address,
394     };
395
396     if (pfx.fp_len < 128)
397     {
398         fib_table_entry_delete(fib_index,
399                                &pfx,
400                                FIB_SOURCE_INTERFACE);
401
402     }
403
404     pfx.fp_len = 128;
405     fib_table_entry_delete(fib_index,
406                            &pfx,
407                            FIB_SOURCE_INTERFACE);
408 }
409
410 void
411 ip6_sw_interface_enable_disable (u32 sw_if_index,
412                                  u32 is_enable)
413 {
414   vlib_main_t * vm = vlib_get_main();
415   ip6_main_t * im = &ip6_main;
416   ip_lookup_main_t * lm = &im->lookup_main;
417   u32 ci, cast;
418   u32 lookup_feature_index;
419
420   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
421
422   /*
423    * enable/disable only on the 1<->0 transition
424    */
425   if (is_enable)
426     {
427       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
428         return;
429     }
430   else
431     {
432       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
433       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
434         return;
435     }
436
437   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
438     {
439       ip_config_main_t * cm = &lm->feature_config_mains[cast];
440       vnet_config_main_t * vcm = &cm->config_main;
441
442       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
443       ci = cm->config_index_by_sw_if_index[sw_if_index];
444
445       if (cast == VNET_IP_RX_UNICAST_FEAT)
446         lookup_feature_index = im->ip6_unicast_rx_feature_lookup;
447       else
448         lookup_feature_index = im->ip6_multicast_rx_feature_lookup;
449
450       if (is_enable)
451         ci = vnet_config_add_feature (vm, vcm,
452                                       ci,
453                                       lookup_feature_index,
454                                       /* config data */ 0,
455                                       /* # bytes of config data */ 0);
456       else
457         ci = vnet_config_del_feature (vm, vcm,
458                                       ci,
459                                       lookup_feature_index,
460                                       /* config data */ 0,
461                                       /* # bytes of config data */ 0);
462
463       cm->config_index_by_sw_if_index[sw_if_index] = ci;
464     }
465 }
466
467 /* get first interface address */
468 ip6_address_t *
469 ip6_interface_first_address (ip6_main_t * im,
470                              u32 sw_if_index,
471                              ip_interface_address_t ** result_ia)
472 {
473   ip_lookup_main_t * lm = &im->lookup_main;
474   ip_interface_address_t * ia = 0;
475   ip6_address_t * result = 0;
476
477   foreach_ip_interface_address (lm, ia, sw_if_index,
478                                 1 /* honor unnumbered */,
479   ({
480     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
481     result = a;
482     break;
483   }));
484   if (result_ia)
485     *result_ia = result ? ia : 0;
486   return result;
487 }
488
489 clib_error_t *
490 ip6_add_del_interface_address (vlib_main_t * vm,
491                                u32 sw_if_index,
492                                ip6_address_t * address,
493                                u32 address_length,
494                                u32 is_del)
495 {
496   vnet_main_t * vnm = vnet_get_main();
497   ip6_main_t * im = &ip6_main;
498   ip_lookup_main_t * lm = &im->lookup_main;
499   clib_error_t * error;
500   u32 if_address_index;
501   ip6_address_fib_t ip6_af, * addr_fib = 0;
502
503   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
504   ip6_addr_fib_init (&ip6_af, address,
505                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
506   vec_add1 (addr_fib, ip6_af);
507
508   {
509     uword elts_before = pool_elts (lm->if_address_pool);
510
511     error = ip_interface_address_add_del
512       (lm,
513        sw_if_index,
514        addr_fib,
515        address_length,
516        is_del,
517        &if_address_index);
518     if (error)
519       goto done;
520
521     /* Pool did not grow: add duplicate address. */
522     if (elts_before == pool_elts (lm->if_address_pool))
523       goto done;
524   }
525
526   if (is_del)
527       ip6_del_interface_routes (im, ip6_af.fib_index, address,
528                                 address_length);
529   else
530       ip6_add_interface_routes (vnm, sw_if_index,
531                                 im, ip6_af.fib_index,
532                                 pool_elt_at_index (lm->if_address_pool, if_address_index));
533
534   {
535     ip6_add_del_interface_address_callback_t * cb;
536     vec_foreach (cb, im->add_del_interface_address_callbacks)
537       cb->function (im, cb->function_opaque, sw_if_index,
538                     address, address_length,
539                     if_address_index,
540                     is_del);
541   }
542
543  done:
544   vec_free (addr_fib);
545   return error;
546 }
547
548 clib_error_t *
549 ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
550                                 u32 sw_if_index,
551                                 u32 flags)
552 {
553   ip6_main_t * im = &ip6_main;
554   ip_interface_address_t * ia;
555   ip6_address_t * a;
556   u32 is_admin_up, fib_index;
557
558   /* Fill in lookup tables with default table (0). */
559   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
560
561   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
562
563   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
564
565   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
566
567   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
568                                 0 /* honor unnumbered */,
569   ({
570     a = ip_interface_address_get_address (&im->lookup_main, ia);
571     if (is_admin_up)
572       ip6_add_interface_routes (vnm, sw_if_index,
573                                 im, fib_index,
574                                 ia);
575     else
576       ip6_del_interface_routes (im, fib_index,
577                                 a, ia->address_length);
578   }));
579
580   return 0;
581 }
582
583 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
584
585 /* Built-in ip6 unicast rx feature path definition */
586 VNET_IP6_UNICAST_FEATURE_INIT (ip6_flow_classify, static) = {
587   .node_name = "ip6-flow-classify",
588   .runs_before = ORDER_CONSTRAINTS {"ip6-inacl", 0},
589   .feature_index = &ip6_main.ip6_unicast_rx_feature_flow_classify,
590 };
591
592 VNET_IP6_UNICAST_FEATURE_INIT (ip6_inacl, static) = {
593   .node_name = "ip6-inacl",
594   .runs_before = ORDER_CONSTRAINTS {"ip6-policer-classify", 0},
595   .feature_index = &ip6_main.ip6_unicast_rx_feature_check_access,
596 };
597
598 VNET_IP6_UNICAST_FEATURE_INIT (ip6_policer_classify, static) = {
599   .node_name = "ip6-policer-classify",
600   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip6", 0},
601   .feature_index = &ip6_main.ip6_unicast_rx_feature_policer_classify,
602 };
603
604 VNET_IP6_UNICAST_FEATURE_INIT (ip6_ipsec, static) = {
605   .node_name = "ipsec-input-ip6",
606   .runs_before = ORDER_CONSTRAINTS {"l2tp-decap", 0},
607   .feature_index = &ip6_main.ip6_unicast_rx_feature_ipsec,
608 };
609
610 VNET_IP6_UNICAST_FEATURE_INIT (ip6_l2tp, static) = {
611   .node_name = "l2tp-decap",
612   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip6", 0},
613   .feature_index = &ip6_main.ip6_unicast_rx_feature_l2tp_decap,
614 };
615
616 VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = {
617   .node_name = "vpath-input-ip6",
618   .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0},
619   .feature_index = &ip6_main.ip6_unicast_rx_feature_vpath,
620 };
621
622 VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = {
623   .node_name = "ip6-lookup",
624   .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
625   .feature_index = &ip6_main.ip6_unicast_rx_feature_lookup,
626 };
627
628 VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = {
629   .node_name = "ip6-drop",
630   .runs_before = 0,  /*last feature*/
631   .feature_index = &ip6_main.ip6_unicast_rx_feature_drop,
632 };
633
634 /* Built-in ip6 multicast rx feature path definition (none now) */
635 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = {
636   .node_name = "vpath-input-ip6",
637   .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0},
638   .feature_index = &ip6_main.ip6_multicast_rx_feature_vpath,
639 };
640
641 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = {
642   .node_name = "ip6-lookup",
643   .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0},
644   .feature_index = &ip6_main.ip6_multicast_rx_feature_lookup,
645 };
646
647 VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = {
648   .node_name = "ip6-drop",
649   .runs_before = 0, /* last feature */
650   .feature_index = &ip6_main.ip6_multicast_rx_feature_drop,
651 };
652
653 static char * rx_feature_start_nodes[] =
654   {"ip6-input"};
655
656 static char * tx_feature_start_nodes[] =
657 {
658   "ip6-rewrite",
659   "ip6-midchain",
660 };
661
662 /* Built-in ip4 tx feature path definition */
663 VNET_IP6_TX_FEATURE_INIT (interface_output, static) = {
664   .node_name = "interface-output",
665   .runs_before = 0, /* not before any other features */
666   .feature_index = &ip6_main.ip6_tx_feature_interface_output,
667 };
668
669 static clib_error_t *
670 ip6_feature_init (vlib_main_t * vm, ip6_main_t * im)
671 {
672   ip_lookup_main_t * lm = &im->lookup_main;
673   clib_error_t * error;
674   vnet_cast_t cast;
675   ip_config_main_t * cm;
676   vnet_config_main_t * vcm;
677   char **feature_start_nodes;
678   int feature_start_len;
679
680   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
681     {
682       cm = &lm->feature_config_mains[cast];
683       vcm = &cm->config_main;
684
685       if (cast < VNET_IP_TX_FEAT)
686         {
687           feature_start_nodes = rx_feature_start_nodes;
688           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
689         }
690       else
691         {
692           feature_start_nodes = tx_feature_start_nodes;
693           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
694         }
695
696       if ((error = vnet_feature_arc_init (vm, vcm,
697                                          feature_start_nodes,
698                                          feature_start_len,
699                                          im->next_feature[cast],
700                                          &im->feature_nodes[cast])))
701         return error;
702     }
703   return 0;
704 }
705
706 clib_error_t *
707 ip6_sw_interface_add_del (vnet_main_t * vnm,
708                           u32 sw_if_index,
709                           u32 is_add)
710 {
711   vlib_main_t * vm = vnm->vlib_main;
712   ip6_main_t * im = &ip6_main;
713   ip_lookup_main_t * lm = &im->lookup_main;
714   u32 ci, cast;
715   u32 feature_index;
716
717   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
718     {
719       ip_config_main_t * cm = &lm->feature_config_mains[cast];
720       vnet_config_main_t * vcm = &cm->config_main;
721
722       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
723       ci = cm->config_index_by_sw_if_index[sw_if_index];
724
725       if (cast == VNET_IP_RX_UNICAST_FEAT)
726         feature_index = im->ip6_unicast_rx_feature_drop;
727       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
728         feature_index = im->ip6_multicast_rx_feature_drop;
729       else
730         feature_index = im->ip6_tx_feature_interface_output;
731
732       if (is_add)
733         ci = vnet_config_add_feature (vm, vcm,
734                                       ci,
735                                       feature_index,
736                                       /* config data */ 0,
737                                       /* # bytes of config data */ 0);
738       else
739         {
740           ci = vnet_config_del_feature (vm, vcm, ci,
741                                         feature_index,
742                                         /* config data */ 0,
743                                         /* # bytes of config data */ 0);
744           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
745               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
746         }
747       cm->config_index_by_sw_if_index[sw_if_index] = ci;
748       /*
749        * note: do not update the tx feature count here.
750        */
751     }
752   return /* no error */ 0;
753 }
754
755 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
756
757 static uword
758 ip6_lookup (vlib_main_t * vm,
759             vlib_node_runtime_t * node,
760             vlib_frame_t * frame)
761 {
762   return ip6_lookup_inline (vm, node, frame);
763 }
764
765 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args);
766
767 VLIB_REGISTER_NODE (ip6_lookup_node) = {
768   .function = ip6_lookup,
769   .name = "ip6-lookup",
770   .vector_size = sizeof (u32),
771
772   .format_trace = format_ip6_lookup_trace,
773
774   .n_next_nodes = IP6_LOOKUP_N_NEXT,
775   .next_nodes = IP6_LOOKUP_NEXT_NODES,
776 };
777
778 VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup)
779
780 always_inline uword
781 ip6_load_balance (vlib_main_t * vm,
782                   vlib_node_runtime_t * node,
783                   vlib_frame_t * frame)
784 {
785   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
786   u32 n_left_from, n_left_to_next, * from, * to_next;
787   ip_lookup_next_t next;
788   u32 cpu_index = os_get_cpu_number();
789
790   from = vlib_frame_vector_args (frame);
791   n_left_from = frame->n_vectors;
792   next = node->cached_next_index;
793
794   if (node->flags & VLIB_NODE_FLAG_TRACE)
795       ip6_forward_next_trace(vm, node, frame, VLIB_TX);
796
797   while (n_left_from > 0)
798     {
799       vlib_get_next_frame (vm, node, next,
800                            to_next, n_left_to_next);
801
802
803       while (n_left_from > 0 && n_left_to_next > 0)
804         {
805           ip_lookup_next_t next0;
806           const load_balance_t *lb0;
807           vlib_buffer_t * p0;
808           u32 pi0, lbi0, hc0;
809           const ip6_header_t *ip0;
810           const dpo_id_t *dpo0;
811
812           pi0 = from[0];
813           to_next[0] = pi0;
814
815           p0 = vlib_get_buffer (vm, pi0);
816
817           ip0 = vlib_buffer_get_current (p0);
818           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
819
820           lb0 = load_balance_get(lbi0);
821           hc0 = lb0->lb_hash_config;
822           vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0);
823
824           dpo0 = load_balance_get_bucket_i(lb0,
825                                            vnet_buffer(p0)->ip.flow_hash &
826                                            (lb0->lb_n_buckets - 1));
827
828           next0 = dpo0->dpoi_next_node;
829           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
830
831           vlib_increment_combined_counter
832               (cm, cpu_index, lbi0, 1,
833                vlib_buffer_length_in_chain (vm, p0));
834
835           from += 1;
836           to_next += 1;
837           n_left_to_next -= 1;
838           n_left_from -= 1;
839
840           if (PREDICT_FALSE (next0 != next))
841             {
842               n_left_to_next += 1;
843               vlib_put_next_frame (vm, node, next, n_left_to_next);
844               next = next0;
845               vlib_get_next_frame (vm, node, next,
846                                    to_next, n_left_to_next);
847               to_next[0] = pi0;
848               to_next += 1;
849               n_left_to_next -= 1;
850             }
851         }
852
853       vlib_put_next_frame (vm, node, next, n_left_to_next);
854     }
855
856   return frame->n_vectors;
857 }
858
859 VLIB_REGISTER_NODE (ip6_load_balance_node) = {
860   .function = ip6_load_balance,
861   .name = "ip6-load-balance",
862   .vector_size = sizeof (u32),
863   .sibling_of = "ip6-lookup",
864   .format_trace = format_ip6_lookup_trace,
865   .n_next_nodes = 0,
866 };
867
868 VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance)
869
870 typedef struct {
871   /* Adjacency taken. */
872   u32 adj_index;
873   u32 flow_hash;
874   u32 fib_index;
875
876   /* Packet data, possibly *after* rewrite. */
877   u8 packet_data[128 - 1*sizeof(u32)];
878 } ip6_forward_next_trace_t;
879
880 static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
881 {
882   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
883   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
884   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
885   uword indent = format_get_indent (s);
886
887   s = format(s, "%U%U",
888              format_white_space, indent,
889              format_ip6_header, t->packet_data, sizeof (t->packet_data));
890   return s;
891 }
892
893 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args)
894 {
895   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
896   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
897   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
898   uword indent = format_get_indent (s);
899
900   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
901               t->fib_index, t->adj_index, t->flow_hash);
902   s = format(s, "\n%U%U",
903              format_white_space, indent,
904              format_ip6_header, t->packet_data, sizeof (t->packet_data));
905   return s;
906 }
907
908
909 static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
910 {
911   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
912   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
913   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
914   vnet_main_t * vnm = vnet_get_main();
915   uword indent = format_get_indent (s);
916
917   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
918               t->fib_index, t->adj_index, format_ip_adjacency,
919               vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
920               t->flow_hash);
921   s = format (s, "\n%U%U",
922               format_white_space, indent,
923               format_ip_adjacency_packet_data,
924               vnm, t->adj_index,
925               t->packet_data, sizeof (t->packet_data));
926   return s;
927 }
928
929 /* Common trace function for all ip6-forward next nodes. */
930 void
931 ip6_forward_next_trace (vlib_main_t * vm,
932                         vlib_node_runtime_t * node,
933                         vlib_frame_t * frame,
934                         vlib_rx_or_tx_t which_adj_index)
935 {
936   u32 * from, n_left;
937   ip6_main_t * im = &ip6_main;
938
939   n_left = frame->n_vectors;
940   from = vlib_frame_vector_args (frame);
941
942   while (n_left >= 4)
943     {
944       u32 bi0, bi1;
945       vlib_buffer_t * b0, * b1;
946       ip6_forward_next_trace_t * t0, * t1;
947
948       /* Prefetch next iteration. */
949       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
950       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
951
952       bi0 = from[0];
953       bi1 = from[1];
954
955       b0 = vlib_get_buffer (vm, bi0);
956       b1 = vlib_get_buffer (vm, bi1);
957
958       if (b0->flags & VLIB_BUFFER_IS_TRACED)
959         {
960           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
961           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
962           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
963           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
964               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
965               vec_elt (im->fib_index_by_sw_if_index,
966                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
967
968           clib_memcpy (t0->packet_data,
969                   vlib_buffer_get_current (b0),
970                   sizeof (t0->packet_data));
971         }
972       if (b1->flags & VLIB_BUFFER_IS_TRACED)
973         {
974           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
975           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
976           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
977           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
978               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
979               vec_elt (im->fib_index_by_sw_if_index,
980                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
981
982           clib_memcpy (t1->packet_data,
983                   vlib_buffer_get_current (b1),
984                   sizeof (t1->packet_data));
985         }
986       from += 2;
987       n_left -= 2;
988     }
989
990   while (n_left >= 1)
991     {
992       u32 bi0;
993       vlib_buffer_t * b0;
994       ip6_forward_next_trace_t * t0;
995
996       bi0 = from[0];
997
998       b0 = vlib_get_buffer (vm, bi0);
999
1000       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1001         {
1002           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1003           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1004           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1005           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1006               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1007               vec_elt (im->fib_index_by_sw_if_index,
1008                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1009
1010           clib_memcpy (t0->packet_data,
1011                   vlib_buffer_get_current (b0),
1012                   sizeof (t0->packet_data));
1013         }
1014       from += 1;
1015       n_left -= 1;
1016     }
1017 }
1018
1019 static uword
1020 ip6_drop_or_punt (vlib_main_t * vm,
1021                   vlib_node_runtime_t * node,
1022                   vlib_frame_t * frame,
1023                   ip6_error_t error_code)
1024 {
1025   u32 * buffers = vlib_frame_vector_args (frame);
1026   uword n_packets = frame->n_vectors;
1027
1028   vlib_error_drop_buffers (vm, node,
1029                            buffers,
1030                            /* stride */ 1,
1031                            n_packets,
1032                            /* next */ 0,
1033                            ip6_input_node.index,
1034                            error_code);
1035
1036   if (node->flags & VLIB_NODE_FLAG_TRACE)
1037     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1038
1039   return n_packets;
1040 }
1041
1042 static uword
1043 ip6_drop (vlib_main_t * vm,
1044           vlib_node_runtime_t * node,
1045           vlib_frame_t * frame)
1046 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
1047
1048 static uword
1049 ip6_punt (vlib_main_t * vm,
1050           vlib_node_runtime_t * node,
1051           vlib_frame_t * frame)
1052 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
1053
1054 VLIB_REGISTER_NODE (ip6_drop_node,static) = {
1055   .function = ip6_drop,
1056   .name = "ip6-drop",
1057   .vector_size = sizeof (u32),
1058
1059   .format_trace = format_ip6_forward_next_trace,
1060
1061   .n_next_nodes = 1,
1062   .next_nodes = {
1063     [0] = "error-drop",
1064   },
1065 };
1066
1067 VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop)
1068
1069 VLIB_REGISTER_NODE (ip6_punt_node,static) = {
1070   .function = ip6_punt,
1071   .name = "ip6-punt",
1072   .vector_size = sizeof (u32),
1073
1074   .format_trace = format_ip6_forward_next_trace,
1075
1076   .n_next_nodes = 1,
1077   .next_nodes = {
1078     [0] = "error-punt",
1079   },
1080 };
1081
1082 VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt)
1083
1084 VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
1085   .function = ip6_drop,
1086   .name = "ip6-multicast",
1087   .vector_size = sizeof (u32),
1088
1089   .format_trace = format_ip6_forward_next_trace,
1090
1091   .n_next_nodes = 1,
1092   .next_nodes = {
1093     [0] = "error-drop",
1094   },
1095 };
1096
1097 /* Compute TCP/UDP/ICMP6 checksum in software. */
1098 u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
1099 {
1100   ip_csum_t sum0;
1101   u16 sum16, payload_length_host_byte_order;
1102   u32 i, n_this_buffer, n_bytes_left;
1103   u32 headers_size = sizeof(ip0[0]);
1104   void * data_this_buffer;
1105
1106   ASSERT(bogus_lengthp);
1107   *bogus_lengthp = 0;
1108
1109   /* Initialize checksum with ip header. */
1110   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1111   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1112   data_this_buffer = (void *) (ip0 + 1);
1113
1114   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1115     {
1116       sum0 = ip_csum_with_carry (sum0,
1117                                  clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1118       sum0 = ip_csum_with_carry (sum0,
1119                                  clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1120     }
1121
1122   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1123   if (PREDICT_FALSE (ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1124     {
1125       u32  skip_bytes;
1126       ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)data_this_buffer;
1127
1128       /* validate really icmp6 next */
1129       ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
1130
1131       skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
1132       data_this_buffer  = (void *)((u8 *)data_this_buffer + skip_bytes);
1133
1134       payload_length_host_byte_order  -= skip_bytes;
1135       headers_size += skip_bytes;
1136    }
1137
1138   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1139 #if DPDK > 0
1140   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1141   {
1142     struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer(p0);
1143     u8 nb_segs = mb->nb_segs;
1144
1145     n_this_buffer = (p0->current_length > headers_size ?
1146                      p0->current_length - headers_size : 0);
1147     while (n_bytes_left)
1148       {
1149         sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1150         n_bytes_left -= n_this_buffer;
1151
1152         mb = mb->next;
1153         nb_segs--;
1154         if ((nb_segs == 0) || (mb == 0))
1155           break;
1156
1157         data_this_buffer = rte_ctrlmbuf_data(mb);
1158         n_this_buffer = mb->data_len;
1159       }
1160     if (n_bytes_left || nb_segs)
1161       {
1162         *bogus_lengthp = 1;
1163         return 0xfefe;
1164       }
1165   }
1166   else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1167 #else
1168   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1169     n_this_buffer = p0->current_length > headers_size  ? p0->current_length - headers_size  : 0;
1170   while (1)
1171     {
1172       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1173       n_bytes_left -= n_this_buffer;
1174       if (n_bytes_left == 0)
1175         break;
1176
1177       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1178         {
1179           *bogus_lengthp = 1;
1180           return 0xfefe;
1181         }
1182       p0 = vlib_get_buffer (vm, p0->next_buffer);
1183       data_this_buffer = vlib_buffer_get_current (p0);
1184       n_this_buffer = p0->current_length;
1185     }
1186 #endif /* DPDK */
1187
1188   sum16 = ~ ip_csum_fold (sum0);
1189
1190   return sum16;
1191 }
1192
1193 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1194 {
1195   ip6_header_t * ip0 = vlib_buffer_get_current (p0);
1196   udp_header_t * udp0;
1197   u16 sum16;
1198   int bogus_length;
1199
1200   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1201   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1202           || ip0->protocol == IP_PROTOCOL_ICMP6
1203           || ip0->protocol == IP_PROTOCOL_UDP
1204           || ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1205
1206   udp0 = (void *) (ip0 + 1);
1207   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1208     {
1209       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1210                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1211       return p0->flags;
1212     }
1213
1214   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1215
1216   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1217                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1218
1219   return p0->flags;
1220 }
1221
1222 static uword
1223 ip6_local (vlib_main_t * vm,
1224            vlib_node_runtime_t * node,
1225            vlib_frame_t * frame)
1226 {
1227   ip6_main_t * im = &ip6_main;
1228   ip_lookup_main_t * lm = &im->lookup_main;
1229   ip_local_next_t next_index;
1230   u32 * from, * to_next, n_left_from, n_left_to_next;
1231   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1232
1233   from = vlib_frame_vector_args (frame);
1234   n_left_from = frame->n_vectors;
1235   next_index = node->cached_next_index;
1236
1237   if (node->flags & VLIB_NODE_FLAG_TRACE)
1238     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1239
1240   while (n_left_from > 0)
1241     {
1242       vlib_get_next_frame (vm, node, next_index,
1243                            to_next, n_left_to_next);
1244
1245       while (n_left_from >= 4 && n_left_to_next >= 2)
1246         {
1247           vlib_buffer_t * p0, * p1;
1248           ip6_header_t * ip0, * ip1;
1249           udp_header_t * udp0, * udp1;
1250           u32 pi0, ip_len0, udp_len0, flags0, next0;
1251           u32 pi1, ip_len1, udp_len1, flags1, next1;
1252           i32 len_diff0, len_diff1;
1253           u8 error0, type0, good_l4_checksum0;
1254           u8 error1, type1, good_l4_checksum1;
1255
1256           pi0 = to_next[0] = from[0];
1257           pi1 = to_next[1] = from[1];
1258           from += 2;
1259           n_left_from -= 2;
1260           to_next += 2;
1261           n_left_to_next -= 2;
1262
1263           p0 = vlib_get_buffer (vm, pi0);
1264           p1 = vlib_get_buffer (vm, pi1);
1265
1266           ip0 = vlib_buffer_get_current (p0);
1267           ip1 = vlib_buffer_get_current (p1);
1268
1269           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1270           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1271
1272           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1273           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1274
1275           flags0 = p0->flags;
1276           flags1 = p1->flags;
1277
1278           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1279           good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1280
1281           udp0 = ip6_next_header (ip0);
1282           udp1 = ip6_next_header (ip1);
1283
1284           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1285           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1286           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
1287
1288           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1289           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1290
1291           /* Verify UDP length. */
1292           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1293           ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1294           udp_len0 = clib_net_to_host_u16 (udp0->length);
1295           udp_len1 = clib_net_to_host_u16 (udp1->length);
1296
1297           len_diff0 = ip_len0 - udp_len0;
1298           len_diff1 = ip_len1 - udp_len1;
1299
1300           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1301           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1302
1303           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1304                              && ! good_l4_checksum0
1305                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1306             {
1307               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1308               good_l4_checksum0 =
1309                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1310             }
1311           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1312                              && ! good_l4_checksum1
1313                              && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1314             {
1315               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1316               good_l4_checksum1 =
1317                 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1318             }
1319
1320           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1321
1322           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1323           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1324
1325           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1326           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1327           error0 = (! good_l4_checksum0
1328                     ? IP6_ERROR_UDP_CHECKSUM + type0
1329                     : error0);
1330           error1 = (! good_l4_checksum1
1331                     ? IP6_ERROR_UDP_CHECKSUM + type1
1332                     : error1);
1333
1334           /* Drop packets from unroutable hosts. */
1335           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1336           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1337               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1338               !ip6_address_is_link_local_unicast(&ip0->src_address))
1339             {
1340               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1341               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1342                         ? IP6_ERROR_SRC_LOOKUP_MISS
1343                         : error0);
1344             }
1345           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1346               type1 != IP_BUILTIN_PROTOCOL_ICMP &&
1347               !ip6_address_is_link_local_unicast(&ip1->src_address))
1348             {
1349               u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
1350               error1 = (ADJ_INDEX_INVALID == src_adj_index1
1351                         ? IP6_ERROR_SRC_LOOKUP_MISS
1352                         : error1);
1353             }
1354
1355           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1356           next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1357
1358           p0->error = error_node->errors[error0];
1359           p1->error = error_node->errors[error1];
1360
1361           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1362                                            to_next, n_left_to_next,
1363                                            pi0, pi1, next0, next1);
1364         }
1365
1366       while (n_left_from > 0 && n_left_to_next > 0)
1367         {
1368           vlib_buffer_t * p0;
1369           ip6_header_t * ip0;
1370           udp_header_t * udp0;
1371           u32 pi0, ip_len0, udp_len0, flags0, next0;
1372           i32 len_diff0;
1373           u8 error0, type0, good_l4_checksum0;
1374
1375           pi0 = to_next[0] = from[0];
1376           from += 1;
1377           n_left_from -= 1;
1378           to_next += 1;
1379           n_left_to_next -= 1;
1380
1381           p0 = vlib_get_buffer (vm, pi0);
1382
1383           ip0 = vlib_buffer_get_current (p0);
1384
1385           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1386           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1387
1388           flags0 = p0->flags;
1389
1390           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1391
1392           udp0 = ip6_next_header (ip0);
1393
1394           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1395           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1396
1397           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1398
1399           /* Verify UDP length. */
1400           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1401           udp_len0 = clib_net_to_host_u16 (udp0->length);
1402
1403           len_diff0 = ip_len0 - udp_len0;
1404
1405           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1406
1407           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1408                              && ! good_l4_checksum0
1409                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1410             {
1411               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1412               good_l4_checksum0 =
1413                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1414             }
1415
1416           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1417
1418           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1419
1420           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1421           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1422           error0 = (! good_l4_checksum0
1423                     ? IP6_ERROR_UDP_CHECKSUM + type0
1424                     : error0);
1425
1426           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1427           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1428               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1429               !ip6_address_is_link_local_unicast(&ip0->src_address))
1430             {
1431               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1432               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1433                         ? IP6_ERROR_SRC_LOOKUP_MISS
1434                         : error0);
1435             }
1436
1437           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1438
1439           p0->error = error_node->errors[error0];
1440
1441           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1442                                            to_next, n_left_to_next,
1443                                            pi0, next0);
1444         }
1445
1446       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1447     }
1448
1449   return frame->n_vectors;
1450 }
1451
1452 VLIB_REGISTER_NODE (ip6_local_node,static) = {
1453   .function = ip6_local,
1454   .name = "ip6-local",
1455   .vector_size = sizeof (u32),
1456
1457   .format_trace = format_ip6_forward_next_trace,
1458
1459   .n_next_nodes = IP_LOCAL_N_NEXT,
1460   .next_nodes = {
1461     [IP_LOCAL_NEXT_DROP] = "error-drop",
1462     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1463     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1464     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1465   },
1466 };
1467
1468 VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local)
1469
1470 void ip6_register_protocol (u32 protocol, u32 node_index)
1471 {
1472   vlib_main_t * vm = vlib_get_main();
1473   ip6_main_t * im = &ip6_main;
1474   ip_lookup_main_t * lm = &im->lookup_main;
1475
1476   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1477   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
1478 }
1479
1480 typedef enum {
1481   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1482   IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
1483   IP6_DISCOVER_NEIGHBOR_N_NEXT,
1484 } ip6_discover_neighbor_next_t;
1485
1486 typedef enum {
1487   IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
1488   IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
1489   IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
1490 } ip6_discover_neighbor_error_t;
1491
1492 static uword
1493 ip6_discover_neighbor_inline (vlib_main_t * vm,
1494                               vlib_node_runtime_t * node,
1495                               vlib_frame_t * frame,
1496                               int is_glean)
1497 {
1498   vnet_main_t * vnm = vnet_get_main();
1499   ip6_main_t * im = &ip6_main;
1500   ip_lookup_main_t * lm = &im->lookup_main;
1501   u32 * from, * to_next_drop;
1502   uword n_left_from, n_left_to_next_drop;
1503   static f64 time_last_seed_change = -1e100;
1504   static u32 hash_seeds[3];
1505   static uword hash_bitmap[256 / BITS (uword)];
1506   f64 time_now;
1507   int bogus_length;
1508
1509   if (node->flags & VLIB_NODE_FLAG_TRACE)
1510     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1511
1512   time_now = vlib_time_now (vm);
1513   if (time_now - time_last_seed_change > 1e-3)
1514     {
1515       uword i;
1516       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1517                                              sizeof (hash_seeds));
1518       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1519         hash_seeds[i] = r[i];
1520
1521       /* Mark all hash keys as been not-seen before. */
1522       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1523         hash_bitmap[i] = 0;
1524
1525       time_last_seed_change = time_now;
1526     }
1527
1528   from = vlib_frame_vector_args (frame);
1529   n_left_from = frame->n_vectors;
1530
1531   while (n_left_from > 0)
1532     {
1533       vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1534                            to_next_drop, n_left_to_next_drop);
1535
1536       while (n_left_from > 0 && n_left_to_next_drop > 0)
1537         {
1538           vlib_buffer_t * p0;
1539           ip6_header_t * ip0;
1540           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1541           uword bm0;
1542           ip_adjacency_t * adj0;
1543           vnet_hw_interface_t * hw_if0;
1544           u32 next0;
1545
1546           pi0 = from[0];
1547
1548           p0 = vlib_get_buffer (vm, pi0);
1549
1550           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1551
1552           ip0 = vlib_buffer_get_current (p0);
1553
1554           adj0 = ip_get_adjacency (lm, adj_index0);
1555
1556           if (!is_glean)
1557             {
1558               ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
1559               ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
1560             }
1561
1562           a0 = hash_seeds[0];
1563           b0 = hash_seeds[1];
1564           c0 = hash_seeds[2];
1565
1566           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1567           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1568
1569           a0 ^= sw_if_index0;
1570           b0 ^= ip0->dst_address.as_u32[0];
1571           c0 ^= ip0->dst_address.as_u32[1];
1572
1573           hash_v3_mix32 (a0, b0, c0);
1574
1575           b0 ^= ip0->dst_address.as_u32[2];
1576           c0 ^= ip0->dst_address.as_u32[3];
1577
1578           hash_v3_finalize32 (a0, b0, c0);
1579
1580           c0 &= BITS (hash_bitmap) - 1;
1581           c0 = c0 / BITS (uword);
1582           m0 = (uword) 1 << (c0 % BITS (uword));
1583
1584           bm0 = hash_bitmap[c0];
1585           drop0 = (bm0 & m0) != 0;
1586
1587           /* Mark it as seen. */
1588           hash_bitmap[c0] = bm0 | m0;
1589
1590           from += 1;
1591           n_left_from -= 1;
1592           to_next_drop[0] = pi0;
1593           to_next_drop += 1;
1594           n_left_to_next_drop -= 1;
1595
1596           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1597
1598           /* If the interface is link-down, drop the pkt */
1599           if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
1600             drop0 = 1;
1601
1602           p0->error =
1603             node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
1604                          : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
1605           if (drop0)
1606             continue;
1607
1608           {
1609             u32 bi0 = 0;
1610             icmp6_neighbor_solicitation_header_t * h0;
1611             vlib_buffer_t * b0;
1612
1613             h0 = vlib_packet_template_get_packet
1614               (vm, &im->discover_neighbor_packet_template, &bi0);
1615
1616             /*
1617              * Build ethernet header.
1618              * Choose source address based on destination lookup
1619              * adjacency.
1620              */
1621             if (ip6_src_address_for_packet (lm,
1622                                             sw_if_index0,
1623                                             &h0->ip.src_address))
1624               {
1625                 /* There is no address on the interface */
1626                 p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
1627                 vlib_buffer_free(vm, &bi0, 1);
1628                 continue;
1629               }
1630
1631             /*
1632              * Destination address is a solicited node multicast address.
1633              * We need to fill in
1634              * the low 24 bits with low 24 bits of target's address.
1635              */
1636             h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
1637             h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
1638             h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
1639
1640             h0->neighbor.target_address = ip0->dst_address;
1641
1642             clib_memcpy (h0->link_layer_option.ethernet_address,
1643                     hw_if0->hw_address, vec_len (hw_if0->hw_address));
1644
1645             /* $$$$ appears we need this; why is the checksum non-zero? */
1646             h0->neighbor.icmp.checksum = 0;
1647             h0->neighbor.icmp.checksum =
1648               ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
1649                                                  &bogus_length);
1650
1651             ASSERT (bogus_length == 0);
1652
1653             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1654             b0 = vlib_get_buffer (vm, bi0);
1655             vnet_buffer (b0)->sw_if_index[VLIB_TX]
1656               = vnet_buffer (p0)->sw_if_index[VLIB_TX];
1657
1658             /* Add rewrite/encap string. */
1659             vnet_rewrite_one_header (adj0[0], h0,
1660                                      sizeof (ethernet_header_t));
1661             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1662
1663             next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
1664
1665             vlib_set_next_frame_buffer (vm, node, next0, bi0);
1666           }
1667         }
1668
1669       vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1670                            n_left_to_next_drop);
1671     }
1672
1673   return frame->n_vectors;
1674 }
1675
1676 static uword
1677 ip6_discover_neighbor (vlib_main_t * vm,
1678                        vlib_node_runtime_t * node,
1679                        vlib_frame_t * frame)
1680 {
1681     return (ip6_discover_neighbor_inline(vm, node, frame, 0));
1682 }
1683
1684 static uword
1685 ip6_glean (vlib_main_t * vm,
1686            vlib_node_runtime_t * node,
1687            vlib_frame_t * frame)
1688 {
1689     return (ip6_discover_neighbor_inline(vm, node, frame, 1));
1690 }
1691
1692 static char * ip6_discover_neighbor_error_strings[] = {
1693   [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
1694   [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]
1695   = "neighbor solicitations sent",
1696   [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
1697     = "no source address for ND solicitation",
1698 };
1699
1700 VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
1701   .function = ip6_discover_neighbor,
1702   .name = "ip6-discover-neighbor",
1703   .vector_size = sizeof (u32),
1704
1705   .format_trace = format_ip6_forward_next_trace,
1706
1707   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1708   .error_strings = ip6_discover_neighbor_error_strings,
1709
1710   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1711   .next_nodes = {
1712     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1713     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1714   },
1715 };
1716
1717 VLIB_REGISTER_NODE (ip6_glean_node) = {
1718   .function = ip6_glean,
1719   .name = "ip6-glean",
1720   .vector_size = sizeof (u32),
1721
1722   .format_trace = format_ip6_forward_next_trace,
1723
1724   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1725   .error_strings = ip6_discover_neighbor_error_strings,
1726
1727   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1728   .next_nodes = {
1729     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1730     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1731   },
1732 };
1733
1734 clib_error_t *
1735 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
1736 {
1737   vnet_main_t * vnm = vnet_get_main();
1738   ip6_main_t * im = &ip6_main;
1739   icmp6_neighbor_solicitation_header_t * h;
1740   ip6_address_t * src;
1741   ip_interface_address_t * ia;
1742   ip_adjacency_t * adj;
1743   vnet_hw_interface_t * hi;
1744   vnet_sw_interface_t * si;
1745   vlib_buffer_t * b;
1746   u32 bi = 0;
1747   int bogus_length;
1748
1749   si = vnet_get_sw_interface (vnm, sw_if_index);
1750
1751   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1752     {
1753       return clib_error_return (0, "%U: interface %U down",
1754                                 format_ip6_address, dst,
1755                                 format_vnet_sw_if_index_name, vnm,
1756                                 sw_if_index);
1757     }
1758
1759   src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1760   if (! src)
1761     {
1762       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1763       return clib_error_return
1764         (0, "no matching interface address for destination %U (interface %U)",
1765          format_ip6_address, dst,
1766          format_vnet_sw_if_index_name, vnm, sw_if_index);
1767     }
1768
1769   h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
1770
1771   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1772
1773   /* Destination address is a solicited node multicast address.  We need to fill in
1774      the low 24 bits with low 24 bits of target's address. */
1775   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1776   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1777   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1778
1779   h->ip.src_address = src[0];
1780   h->neighbor.target_address = dst[0];
1781
1782   clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
1783
1784   h->neighbor.icmp.checksum =
1785     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1786   ASSERT(bogus_length == 0);
1787
1788   b = vlib_get_buffer (vm, bi);
1789   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1790
1791   /* Add encapsulation string for software interface (e.g. ethernet header). */
1792   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
1793   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1794   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1795
1796   {
1797     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
1798     u32 * to_next = vlib_frame_vector_args (f);
1799     to_next[0] = bi;
1800     f->n_vectors = 1;
1801     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1802   }
1803
1804   return /* no error */ 0;
1805 }
1806
1807 typedef enum {
1808   IP6_REWRITE_NEXT_DROP,
1809   IP6_REWRITE_NEXT_ICMP_ERROR,
1810 } ip6_rewrite_next_t;
1811
1812 always_inline uword
1813 ip6_rewrite_inline (vlib_main_t * vm,
1814                     vlib_node_runtime_t * node,
1815                     vlib_frame_t * frame,
1816                     int rewrite_for_locally_received_packets,
1817                     int is_midchain)
1818 {
1819   ip_lookup_main_t * lm = &ip6_main.lookup_main;
1820   u32 * from = vlib_frame_vector_args (frame);
1821   u32 n_left_from, n_left_to_next, * to_next, next_index;
1822   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1823   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
1824   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
1825
1826   n_left_from = frame->n_vectors;
1827   next_index = node->cached_next_index;
1828   u32 cpu_index = os_get_cpu_number();
1829
1830   while (n_left_from > 0)
1831     {
1832       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1833
1834       while (n_left_from >= 4 && n_left_to_next >= 2)
1835         {
1836           ip_adjacency_t * adj0, * adj1;
1837           vlib_buffer_t * p0, * p1;
1838           ip6_header_t * ip0, * ip1;
1839           u32 pi0, rw_len0, next0, error0, adj_index0;
1840           u32 pi1, rw_len1, next1, error1, adj_index1;
1841           u32 tx_sw_if_index0, tx_sw_if_index1;
1842
1843           /* Prefetch next iteration. */
1844           {
1845             vlib_buffer_t * p2, * p3;
1846
1847             p2 = vlib_get_buffer (vm, from[2]);
1848             p3 = vlib_get_buffer (vm, from[3]);
1849
1850             vlib_prefetch_buffer_header (p2, LOAD);
1851             vlib_prefetch_buffer_header (p3, LOAD);
1852
1853             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1854             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1855
1856             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1857             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1858           }
1859
1860           pi0 = to_next[0] = from[0];
1861           pi1 = to_next[1] = from[1];
1862
1863           from += 2;
1864           n_left_from -= 2;
1865           to_next += 2;
1866           n_left_to_next -= 2;
1867
1868           p0 = vlib_get_buffer (vm, pi0);
1869           p1 = vlib_get_buffer (vm, pi1);
1870
1871           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
1872           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
1873
1874           /* We should never rewrite a pkt using the MISS adjacency */
1875           ASSERT(adj_index0 && adj_index1);
1876
1877           ip0 = vlib_buffer_get_current (p0);
1878           ip1 = vlib_buffer_get_current (p1);
1879
1880           error0 = error1 = IP6_ERROR_NONE;
1881           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1882
1883           if (! rewrite_for_locally_received_packets)
1884             {
1885               i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
1886
1887               /* Input node should have reject packets with hop limit 0. */
1888               ASSERT (ip0->hop_limit > 0);
1889               ASSERT (ip1->hop_limit > 0);
1890
1891               hop_limit0 -= 1;
1892               hop_limit1 -= 1;
1893
1894               ip0->hop_limit = hop_limit0;
1895               ip1->hop_limit = hop_limit1;
1896
1897               /*
1898                * If the hop count drops below 1 when forwarding, generate
1899                * an ICMP response.
1900                */
1901               if (PREDICT_FALSE(hop_limit0 <= 0))
1902                 {
1903                   error0 = IP6_ERROR_TIME_EXPIRED;
1904                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1905                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
1906                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
1907                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1908                 }
1909               if (PREDICT_FALSE(hop_limit1 <= 0))
1910                 {
1911                   error1 = IP6_ERROR_TIME_EXPIRED;
1912                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1913                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
1914                   icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded,
1915                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1916                 }
1917             }
1918
1919           adj0 = ip_get_adjacency (lm, adj_index0);
1920           adj1 = ip_get_adjacency (lm, adj_index1);
1921
1922           rw_len0 = adj0[0].rewrite_header.data_bytes;
1923           rw_len1 = adj1[0].rewrite_header.data_bytes;
1924           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
1925           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
1926
1927           vlib_increment_combined_counter (&adjacency_counters,
1928                                            cpu_index,
1929                                            adj_index0,
1930                                            /* packet increment */ 0,
1931                                            /* byte increment */ rw_len0);
1932           vlib_increment_combined_counter (&adjacency_counters,
1933                                            cpu_index,
1934                                            adj_index1,
1935                                            /* packet increment */ 0,
1936                                            /* byte increment */ rw_len1);
1937
1938           /* Check MTU of outgoing interface. */
1939           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
1940                     ? IP6_ERROR_MTU_EXCEEDED
1941                     : error0);
1942           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
1943                     ? IP6_ERROR_MTU_EXCEEDED
1944                     : error1);
1945
1946           /* Don't adjust the buffer for hop count issue; icmp-error node
1947            * wants to see the IP headerr */
1948           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
1949             {
1950               p0->current_data -= rw_len0;
1951               p0->current_length += rw_len0;
1952
1953               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1954               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
1955                   tx_sw_if_index0;
1956               next0 = adj0[0].rewrite_header.next_index;
1957
1958               if (PREDICT_FALSE
1959                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
1960                                     tx_sw_if_index0)))
1961                 {
1962                   p0->current_config_index =
1963                     vec_elt (cm->config_index_by_sw_if_index,
1964                              tx_sw_if_index0);
1965                   vnet_get_config_data (&cm->config_main,
1966                                         &p0->current_config_index,
1967                                         &next0,
1968                                         /* # bytes of config data */ 0);
1969                 }
1970             }
1971           if (PREDICT_TRUE(error1 == IP6_ERROR_NONE))
1972             {
1973               p1->current_data -= rw_len1;
1974               p1->current_length += rw_len1;
1975
1976               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1977               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
1978                   tx_sw_if_index1;
1979               next1 = adj1[0].rewrite_header.next_index;
1980
1981               if (PREDICT_FALSE
1982                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
1983                                     tx_sw_if_index1)))
1984                 {
1985                   p1->current_config_index =
1986                     vec_elt (cm->config_index_by_sw_if_index,
1987                              tx_sw_if_index1);
1988                   vnet_get_config_data (&cm->config_main,
1989                                         &p1->current_config_index,
1990                                         &next1,
1991                                         /* # bytes of config data */ 0);
1992                 }
1993             }
1994
1995           /* Guess we are only writing on simple Ethernet header. */
1996           vnet_rewrite_two_headers (adj0[0], adj1[0],
1997                                     ip0, ip1,
1998                                     sizeof (ethernet_header_t));
1999
2000           if (is_midchain)
2001           {
2002               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2003               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2004           }
2005
2006           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2007                                            to_next, n_left_to_next,
2008                                            pi0, pi1, next0, next1);
2009         }
2010
2011       while (n_left_from > 0 && n_left_to_next > 0)
2012         {
2013           ip_adjacency_t * adj0;
2014           vlib_buffer_t * p0;
2015           ip6_header_t * ip0;
2016           u32 pi0, rw_len0;
2017           u32 adj_index0, next0, error0;
2018           u32 tx_sw_if_index0;
2019
2020           pi0 = to_next[0] = from[0];
2021
2022           p0 = vlib_get_buffer (vm, pi0);
2023
2024           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2025
2026           /* We should never rewrite a pkt using the MISS adjacency */
2027           ASSERT(adj_index0);
2028
2029           adj0 = ip_get_adjacency (lm, adj_index0);
2030
2031           ip0 = vlib_buffer_get_current (p0);
2032
2033           error0 = IP6_ERROR_NONE;
2034           next0 = IP6_REWRITE_NEXT_DROP;
2035
2036           /* Check hop limit */
2037           if (! rewrite_for_locally_received_packets)
2038             {
2039               i32 hop_limit0 = ip0->hop_limit;
2040
2041               ASSERT (ip0->hop_limit > 0);
2042
2043               hop_limit0 -= 1;
2044
2045               ip0->hop_limit = hop_limit0;
2046
2047               if (PREDICT_FALSE(hop_limit0 <= 0))
2048                 {
2049                   /*
2050                    * If the hop count drops below 1 when forwarding, generate
2051                    * an ICMP response.
2052                    */
2053                   error0 = IP6_ERROR_TIME_EXPIRED;
2054                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2055                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2056                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
2057                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
2058                 }
2059             }
2060
2061           /* Guess we are only writing on simple Ethernet header. */
2062           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2063
2064           /* Update packet buffer attributes/set output interface. */
2065           rw_len0 = adj0[0].rewrite_header.data_bytes;
2066           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2067
2068           vlib_increment_combined_counter (&adjacency_counters,
2069                                            cpu_index,
2070                                            adj_index0,
2071                                            /* packet increment */ 0,
2072                                            /* byte increment */ rw_len0);
2073
2074           /* Check MTU of outgoing interface. */
2075           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2076                     ? IP6_ERROR_MTU_EXCEEDED
2077                     : error0);
2078
2079           /* Don't adjust the buffer for hop count issue; icmp-error node
2080            * wants to see the IP headerr */
2081           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
2082             {
2083               p0->current_data -= rw_len0;
2084               p0->current_length += rw_len0;
2085
2086               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2087
2088               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2089               next0 = adj0[0].rewrite_header.next_index;
2090
2091               if (PREDICT_FALSE
2092                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2093                                     tx_sw_if_index0)))
2094                   {
2095                     p0->current_config_index =
2096                       vec_elt (cm->config_index_by_sw_if_index,
2097                                tx_sw_if_index0);
2098                     vnet_get_config_data (&cm->config_main,
2099                                           &p0->current_config_index,
2100                                           &next0,
2101                                           /* # bytes of config data */ 0);
2102                   }
2103             }
2104
2105           if (is_midchain)
2106           {
2107               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2108           }
2109
2110           p0->error = error_node->errors[error0];
2111
2112           from += 1;
2113           n_left_from -= 1;
2114           to_next += 1;
2115           n_left_to_next -= 1;
2116
2117           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2118                                            to_next, n_left_to_next,
2119                                            pi0, next0);
2120         }
2121
2122       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2123     }
2124
2125   /* Need to do trace after rewrites to pick up new packet data. */
2126   if (node->flags & VLIB_NODE_FLAG_TRACE)
2127     ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
2128
2129   return frame->n_vectors;
2130 }
2131
2132 static uword
2133 ip6_rewrite_transit (vlib_main_t * vm,
2134                      vlib_node_runtime_t * node,
2135                      vlib_frame_t * frame)
2136 {
2137   return ip6_rewrite_inline (vm, node, frame,
2138                              /* rewrite_for_locally_received_packets */ 0,
2139                              /* midchain */ 0);
2140 }
2141
2142 static uword
2143 ip6_rewrite_local (vlib_main_t * vm,
2144                    vlib_node_runtime_t * node,
2145                    vlib_frame_t * frame)
2146 {
2147   return ip6_rewrite_inline (vm, node, frame,
2148                              /* rewrite_for_locally_received_packets */ 1,
2149                              /* midchain */ 0);
2150 }
2151
2152 static uword
2153 ip6_midchain (vlib_main_t * vm,
2154               vlib_node_runtime_t * node,
2155               vlib_frame_t * frame)
2156 {
2157   return ip6_rewrite_inline (vm, node, frame,
2158                              /* rewrite_for_locally_received_packets */ 0,
2159                              /* midchain */ 1);
2160 }
2161
2162 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2163   .function = ip6_midchain,
2164   .name = "ip6-midchain",
2165   .vector_size = sizeof (u32),
2166
2167   .format_trace = format_ip6_forward_next_trace,
2168
2169   .sibling_of = "ip6-rewrite",
2170
2171   .next_nodes = {
2172     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2173   },
2174 };
2175
2176 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
2177
2178 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
2179   .function = ip6_rewrite_transit,
2180   .name = "ip6-rewrite",
2181   .vector_size = sizeof (u32),
2182
2183   .format_trace = format_ip6_rewrite_trace,
2184
2185   .n_next_nodes = 2,
2186   .next_nodes = {
2187     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2188     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2189   },
2190 };
2191
2192 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit);
2193
2194 VLIB_REGISTER_NODE (ip6_rewrite_local_node) = {
2195   .function = ip6_rewrite_local,
2196   .name = "ip6-rewrite-local",
2197   .vector_size = sizeof (u32),
2198
2199   .sibling_of = "ip6-rewrite",
2200
2201   .format_trace = format_ip6_rewrite_trace,
2202
2203   .n_next_nodes = 0,
2204 };
2205
2206 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local);
2207
2208 /*
2209  * Hop-by-Hop handling
2210  */
2211
2212 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2213
2214 #define foreach_ip6_hop_by_hop_error \
2215 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2216 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2217 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2218
2219 typedef enum {
2220 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2221   foreach_ip6_hop_by_hop_error
2222 #undef _
2223   IP6_HOP_BY_HOP_N_ERROR,
2224 } ip6_hop_by_hop_error_t;
2225
2226 /*
2227  * Primary h-b-h handler trace support
2228  * We work pretty hard on the problem for obvious reasons
2229  */
2230 typedef struct {
2231   u32 next_index;
2232   u32 trace_len;
2233   u8 option_data[256];
2234 } ip6_hop_by_hop_trace_t;
2235
2236 vlib_node_registration_t ip6_hop_by_hop_node;
2237
2238 static char * ip6_hop_by_hop_error_strings[] = {
2239 #define _(sym,string) string,
2240   foreach_ip6_hop_by_hop_error
2241 #undef _
2242 };
2243
2244 static u8 *
2245 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2246 {
2247   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2248   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2249   ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2250   ip6_hop_by_hop_header_t *hbh0;
2251   ip6_hop_by_hop_option_t *opt0, *limit0;
2252   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2253
2254   u8 type0;
2255
2256   hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
2257
2258   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2259               t->next_index, (hbh0->length+1)<<3, t->trace_len);
2260
2261   opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
2262   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
2263
2264   while (opt0 < limit0) {
2265     type0 = opt0->type;
2266     switch (type0) {
2267     case 0: /* Pad, just stop */
2268       opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2269       break;
2270
2271     default:
2272       if (hm->trace[type0]) {
2273         s = (*hm->trace[type0])(s, opt0);
2274       } else {
2275         s = format (s, "\n    unrecognized option %d length %d", type0, opt0->length);
2276       }
2277       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2278       break;
2279     }
2280   }
2281   return s;
2282 }
2283
2284 always_inline u8 ip6_scan_hbh_options (
2285                                        vlib_buffer_t * b0,
2286                                        ip6_header_t *ip0,
2287                                        ip6_hop_by_hop_header_t *hbh0,
2288                                        ip6_hop_by_hop_option_t *opt0,
2289                                        ip6_hop_by_hop_option_t *limit0,
2290                                        u32 *next0)
2291 {
2292   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2293   u8 type0;
2294   u8 error0 = 0;
2295
2296   while (opt0 < limit0)
2297     {
2298       type0 = opt0->type;
2299       switch (type0)
2300         {
2301         case 0: /* Pad1 */
2302           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2303           continue;
2304         case 1: /* PadN */
2305           break;
2306         default:
2307           if (hm->options[type0])
2308             {
2309               if ((*hm->options[type0])(b0, ip0, opt0) < 0)
2310                 {
2311                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2312                   return(error0);
2313                 }
2314             }
2315           else
2316             {
2317               /* Unrecognized mandatory option, check the two high order bits */
2318               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2319                 {
2320                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2321                   break;
2322                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2323                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2324                   *next0 = IP_LOOKUP_NEXT_DROP;
2325                   break;
2326                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2327                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2328                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2329                   icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2330                                               ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2331                   break;
2332                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2333                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2334                   if (!ip6_address_is_multicast(&ip0->dst_address))
2335                     {
2336                       *next0 =  IP_LOOKUP_NEXT_ICMP_ERROR;
2337                       icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2338                                                   ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2339                     }
2340                   else
2341                     {
2342                       *next0 =  IP_LOOKUP_NEXT_DROP;
2343                     }
2344                   break;
2345                 }
2346               return(error0);
2347             }
2348         }
2349       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2350     }
2351   return(error0);
2352 }
2353
2354 /*
2355  * Process the Hop-by-Hop Options header
2356  */
2357 static uword
2358 ip6_hop_by_hop (vlib_main_t * vm,
2359                 vlib_node_runtime_t * node,
2360                 vlib_frame_t * frame)
2361 {
2362   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_hop_by_hop_node.index);
2363   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2364   u32 n_left_from, *from, *to_next;
2365   ip_lookup_next_t next_index;
2366   ip6_main_t * im = &ip6_main;
2367   ip_lookup_main_t *lm = &im->lookup_main;
2368
2369   from = vlib_frame_vector_args (frame);
2370   n_left_from = frame->n_vectors;
2371   next_index = node->cached_next_index;
2372
2373   while (n_left_from > 0) {
2374     u32 n_left_to_next;
2375
2376     vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2377
2378     while (n_left_from >= 4 && n_left_to_next >= 2) {
2379       u32 bi0, bi1;
2380       vlib_buffer_t * b0, *b1;
2381       u32 next0, next1;
2382       ip6_header_t * ip0, *ip1;
2383       ip6_hop_by_hop_header_t *hbh0, *hbh1;
2384       ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2385       u8 error0 = 0, error1 = 0;
2386
2387       /* Prefetch next iteration. */
2388       {
2389         vlib_buffer_t * p2, * p3;
2390
2391         p2 = vlib_get_buffer (vm, from[2]);
2392         p3 = vlib_get_buffer (vm, from[3]);
2393
2394         vlib_prefetch_buffer_header (p2, LOAD);
2395         vlib_prefetch_buffer_header (p3, LOAD);
2396
2397         CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2398         CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2399       }
2400
2401       /* Speculatively enqueue b0, b1 to the current next frame */
2402       to_next[0] = bi0 = from[0];
2403       to_next[1] = bi1 = from[1];
2404       from += 2;
2405       to_next += 2;
2406       n_left_from -= 2;
2407       n_left_to_next -= 2;
2408
2409       b0 = vlib_get_buffer (vm, bi0);
2410       b1 = vlib_get_buffer (vm, bi1);
2411       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2412       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2413       u32 adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
2414       ip_adjacency_t *adj1 = ip_get_adjacency(lm, adj_index1);
2415
2416       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2417       next0 = adj0->lookup_next_index;
2418       next1 = adj1->lookup_next_index;
2419
2420       ip0 = vlib_buffer_get_current (b0);
2421       ip1 = vlib_buffer_get_current (b1);
2422       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2423       hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1);
2424       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2425       opt1 = (ip6_hop_by_hop_option_t *)(hbh1+1);
2426       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2427       limit1 = (ip6_hop_by_hop_option_t *)((u8 *)hbh1 + ((hbh1->length + 1) << 3));
2428
2429       /*
2430        * Basic validity checks
2431        */
2432       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2433         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2434         next0 = IP_LOOKUP_NEXT_DROP;
2435         goto outdual;
2436       }
2437       /* Scan the set of h-b-h options, process ones that we understand */
2438       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2439
2440       if ((hbh1->length + 1) << 3 > clib_net_to_host_u16(ip1->payload_length)) {
2441         error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2442         next1 = IP_LOOKUP_NEXT_DROP;
2443         goto outdual;
2444       }
2445       /* Scan the set of h-b-h options, process ones that we understand */
2446       error1 = ip6_scan_hbh_options(b1,ip1,hbh1,opt1,limit1, &next1);
2447
2448     outdual:
2449       /* Has the classifier flagged this buffer for special treatment? */
2450       if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
2451         next0 = hm->next_override;
2452
2453       /* Has the classifier flagged this buffer for special treatment? */
2454       if ((error1 == 0) && (vnet_buffer(b1)->l2_classify.opaque_index == OI_DECAP))
2455         next1 = hm->next_override;
2456
2457       if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
2458         {
2459           if (b0->flags & VLIB_BUFFER_IS_TRACED) {
2460             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2461             u32 trace_len = (hbh0->length + 1) << 3;
2462             t->next_index = next0;
2463             /* Capture the h-b-h option verbatim */
2464             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2465             t->trace_len = trace_len;
2466             clib_memcpy(t->option_data, hbh0, trace_len);
2467           }
2468           if (b1->flags & VLIB_BUFFER_IS_TRACED) {
2469             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b1, sizeof (*t));
2470             u32 trace_len = (hbh1->length + 1) << 3;
2471             t->next_index = next1;
2472             /* Capture the h-b-h option verbatim */
2473             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2474             t->trace_len = trace_len;
2475             clib_memcpy(t->option_data, hbh1, trace_len);
2476           }
2477
2478         }
2479
2480       b0->error = error_node->errors[error0];
2481       b1->error = error_node->errors[error1];
2482
2483       /* verify speculative enqueue, maybe switch current next frame */
2484       vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0,
2485                                        bi1,next0, next1);
2486     }
2487
2488     while (n_left_from > 0 && n_left_to_next > 0) {
2489       u32 bi0;
2490       vlib_buffer_t * b0;
2491       u32 next0;
2492       ip6_header_t * ip0;
2493       ip6_hop_by_hop_header_t *hbh0;
2494       ip6_hop_by_hop_option_t *opt0, *limit0;
2495       u8 error0 = 0;
2496
2497       /* Speculatively enqueue b0 to the current next frame */
2498       bi0 = from[0];
2499       to_next[0] = bi0;
2500       from += 1;
2501       to_next += 1;
2502       n_left_from -= 1;
2503       n_left_to_next -= 1;
2504
2505       b0 = vlib_get_buffer (vm, bi0);
2506       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2507       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2508       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2509       next0 = adj0->lookup_next_index;
2510
2511       ip0 = vlib_buffer_get_current (b0);
2512       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2513       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2514       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2515
2516       /*
2517        * Basic validity checks
2518        */
2519       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2520         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2521         next0 = IP_LOOKUP_NEXT_DROP;
2522         goto out0;
2523       }
2524
2525       /* Scan the set of h-b-h options, process ones that we understand */
2526       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2527
2528     out0:
2529       /* Has the classifier flagged this buffer for special treatment? */
2530       if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
2531         next0 = hm->next_override;
2532
2533       if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) {
2534         ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2535         u32 trace_len = (hbh0->length + 1) << 3;
2536         t->next_index = next0;
2537         /* Capture the h-b-h option verbatim */
2538         trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2539         t->trace_len = trace_len;
2540         clib_memcpy(t->option_data, hbh0, trace_len);
2541       }
2542
2543       b0->error = error_node->errors[error0];
2544
2545       /* verify speculative enqueue, maybe switch current next frame */
2546       vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0);
2547     }
2548     vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2549   }
2550   return frame->n_vectors;
2551 }
2552
2553 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
2554   .function = ip6_hop_by_hop,
2555   .name = "ip6-hop-by-hop",
2556   .sibling_of = "ip6-lookup",
2557   .vector_size = sizeof (u32),
2558   .format_trace = format_ip6_hop_by_hop_trace,
2559   .type = VLIB_NODE_TYPE_INTERNAL,
2560   .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
2561   .error_strings = ip6_hop_by_hop_error_strings,
2562   .n_next_nodes = 0,
2563 };
2564
2565 VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
2566
2567 static clib_error_t *
2568 ip6_hop_by_hop_init (vlib_main_t * vm)
2569 {
2570   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2571   memset(hm->options, 0, sizeof(hm->options));
2572   memset(hm->trace, 0, sizeof(hm->trace));
2573   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2574   return (0);
2575 }
2576
2577 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2578
2579 void ip6_hbh_set_next_override (uword next)
2580 {
2581   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2582
2583   hm->next_override = next;
2584 }
2585
2586 int
2587 ip6_hbh_register_option (u8 option,
2588                          int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt),
2589                          u8 *trace(u8 *s, ip6_hop_by_hop_option_t *opt))
2590 {
2591   ip6_main_t * im = &ip6_main;
2592   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2593
2594   ASSERT (option < ARRAY_LEN (hm->options));
2595
2596   /* Already registered */
2597   if (hm->options[option])
2598     return (-1);
2599
2600   hm->options[option] = options;
2601   hm->trace[option] = trace;
2602
2603   /* Set global variable */
2604   im->hbh_enabled = 1;
2605
2606   return (0);
2607 }
2608
2609 int
2610 ip6_hbh_unregister_option (u8 option)
2611 {
2612   ip6_main_t * im = &ip6_main;
2613   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2614
2615   ASSERT (option < ARRAY_LEN (hm->options));
2616
2617   /* Not registered */
2618   if (!hm->options[option])
2619     return (-1);
2620
2621   hm->options[option] = NULL;
2622   hm->trace[option] = NULL;
2623
2624   /* Disable global knob if this was the last option configured */
2625   int i;
2626   bool found = false;
2627   for (i = 0; i < 256; i++) {
2628     if (hm->options[option]) {
2629       found = true;
2630       break;
2631     }
2632   }
2633   if (!found)
2634     im->hbh_enabled = 0;
2635
2636   return (0);
2637 }
2638
2639 /* Global IP6 main. */
2640 ip6_main_t ip6_main;
2641
2642 static clib_error_t *
2643 ip6_lookup_init (vlib_main_t * vm)
2644 {
2645   ip6_main_t * im = &ip6_main;
2646   clib_error_t * error;
2647   uword i;
2648
2649   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2650     {
2651       u32 j, i0, i1;
2652
2653       i0 = i / 32;
2654       i1 = i % 32;
2655
2656       for (j = 0; j < i0; j++)
2657         im->fib_masks[i].as_u32[j] = ~0;
2658
2659       if (i1)
2660         im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2661     }
2662
2663   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2664
2665   if (im->lookup_table_nbuckets == 0)
2666     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2667
2668   im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
2669
2670   if (im->lookup_table_size == 0)
2671     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2672
2673   BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2674                         "ip6 FIB fwding table",
2675                         im->lookup_table_nbuckets,
2676                         im->lookup_table_size);
2677   BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2678                         "ip6 FIB non-fwding table",
2679                         im->lookup_table_nbuckets,
2680                         im->lookup_table_size);
2681
2682   /* Create FIB with index 0 and table id of 0. */
2683   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0);
2684
2685   {
2686     pg_node_t * pn;
2687     pn = pg_get_node (ip6_lookup_node.index);
2688     pn->unformat_edit = unformat_pg_ip6_header;
2689   }
2690
2691   /* Unless explicitly configured, don't process HBH options */
2692   im->hbh_enabled = 0;
2693
2694   {
2695     icmp6_neighbor_solicitation_header_t p;
2696
2697     memset (&p, 0, sizeof (p));
2698
2699     p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
2700     p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
2701                                                 - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
2702     p.ip.protocol = IP_PROTOCOL_ICMP6;
2703     p.ip.hop_limit = 255;
2704     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2705
2706     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2707
2708     p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2709     p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
2710
2711     vlib_packet_template_init (vm,
2712                                &im->discover_neighbor_packet_template,
2713                                &p, sizeof (p),
2714                                /* alloc chunk size */ 8,
2715                                "ip6 neighbor discovery");
2716   }
2717
2718   error = ip6_feature_init (vm, im);
2719
2720   return error;
2721 }
2722
2723 VLIB_INIT_FUNCTION (ip6_lookup_init);
2724
2725 static clib_error_t *
2726 add_del_ip6_interface_table (vlib_main_t * vm,
2727                              unformat_input_t * input,
2728                              vlib_cli_command_t * cmd)
2729 {
2730   vnet_main_t * vnm = vnet_get_main();
2731   clib_error_t * error = 0;
2732   u32 sw_if_index, table_id;
2733
2734   sw_if_index = ~0;
2735
2736   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2737     {
2738       error = clib_error_return (0, "unknown interface `%U'",
2739                                  format_unformat_error, input);
2740       goto done;
2741     }
2742
2743   if (unformat (input, "%d", &table_id))
2744     ;
2745   else
2746     {
2747       error = clib_error_return (0, "expected table id `%U'",
2748                                  format_unformat_error, input);
2749       goto done;
2750     }
2751
2752   {
2753     u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
2754                                                       table_id);
2755
2756     vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
2757     ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
2758   }
2759
2760
2761  done:
2762   return error;
2763 }
2764
2765 /*?
2766  * Place the indicated interface into the supplied IPv6 FIB table (also known
2767  * as a VRF). If the FIB table does not exist, this command creates it. To
2768  * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
2769  * FIB table will only be displayed if a route has been added to the table, or
2770  * an IP Address is assigned to an interface in the table (which adds a route
2771  * automatically).
2772  *
2773  * @note IP addresses added after setting the interface IP table end up in
2774  * the indicated FIB table. If the IP address is added prior to adding the
2775  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2776  * but potentially counter-intuitive results occur if you provision interface
2777  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2778  * IP table ID provisioned. It might be marginally useful to evade source RPF
2779  * drops to put an interface address into multiple FIBs.
2780  *
2781  * @cliexpar
2782  * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
2783  * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
2784  ?*/
2785 /* *INDENT-OFF* */
2786 VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = {
2787   .path = "set interface ip6 table",
2788   .function = add_del_ip6_interface_table,
2789   .short_help = "set interface ip6 table <interface> <table-id>"
2790 };
2791 /* *INDENT-ON* */
2792
2793 void
2794 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
2795                                                   u8 *mac)
2796 {
2797   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2798   /* Invert the "u" bit */
2799   ip->as_u8 [8] = mac[0] ^ (1<<1);
2800   ip->as_u8 [9] = mac[1];
2801   ip->as_u8 [10] = mac[2];
2802   ip->as_u8 [11] = 0xFF;
2803   ip->as_u8 [12] = 0xFE;
2804   ip->as_u8 [13] = mac[3];
2805   ip->as_u8 [14] = mac[4];
2806   ip->as_u8 [15] = mac[5];
2807 }
2808
2809 void
2810 ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
2811                                                   ip6_address_t *ip)
2812 {
2813   /* Invert the previously inverted "u" bit */
2814   mac[0] = ip->as_u8 [8] ^ (1<<1);
2815   mac[1] = ip->as_u8 [9];
2816   mac[2] = ip->as_u8 [10];
2817   mac[3] = ip->as_u8 [13];
2818   mac[4] = ip->as_u8 [14];
2819   mac[5] = ip->as_u8 [15];
2820 }
2821
2822 static clib_error_t *
2823 test_ip6_link_command_fn (vlib_main_t * vm,
2824                           unformat_input_t * input,
2825                           vlib_cli_command_t * cmd)
2826 {
2827   u8 mac[6];
2828   ip6_address_t _a, *a = &_a;
2829
2830   if (unformat (input, "%U", unformat_ethernet_address, mac))
2831     {
2832       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2833       vlib_cli_output (vm, "Link local address: %U",
2834                        format_ip6_address, a);
2835       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2836       vlib_cli_output (vm, "Original MAC address: %U",
2837                        format_ethernet_address, mac);
2838     }
2839
2840   return 0;
2841 }
2842
2843 /*?
2844  * This command converts the given MAC Address into an IPv6 link-local
2845  * address.
2846  *
2847  * @cliexpar
2848  * Example of how to create an IPv6 link-local address:
2849  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2850  * Link local address: fe80::14d9:e0ff:fe91:7986
2851  * Original MAC address: 16:d9:e0:91:79:86
2852  * @cliexend
2853 ?*/
2854 /* *INDENT-OFF* */
2855 VLIB_CLI_COMMAND (test_link_command, static) = {
2856   .path = "test ip6 link",
2857   .function = test_ip6_link_command_fn,
2858   .short_help = "test ip6 link <mac-address>",
2859 };
2860 /* *INDENT-ON* */
2861
2862 int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2863 {
2864   ip6_main_t * im6 = &ip6_main;
2865   ip6_fib_t * fib;
2866   uword * p = hash_get (im6->fib_index_by_table_id, table_id);
2867
2868   if (p == 0)
2869     return -1;
2870
2871   fib = ip6_fib_get (p[0]);
2872
2873   fib->flow_hash_config = flow_hash_config;
2874   return 1;
2875 }
2876
2877 static clib_error_t *
2878 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2879                               unformat_input_t * input,
2880                               vlib_cli_command_t * cmd)
2881 {
2882   int matched = 0;
2883   u32 table_id = 0;
2884   u32 flow_hash_config = 0;
2885   int rv;
2886
2887   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2888     if (unformat (input, "table %d", &table_id))
2889       matched = 1;
2890 #define _(a,v) \
2891     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2892     foreach_flow_hash_bit
2893 #undef _
2894     else break;
2895   }
2896
2897   if (matched == 0)
2898     return clib_error_return (0, "unknown input `%U'",
2899                               format_unformat_error, input);
2900
2901   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2902   switch (rv)
2903     {
2904     case 1:
2905       break;
2906
2907     case -1:
2908       return clib_error_return (0, "no such FIB table %d", table_id);
2909
2910     default:
2911       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2912       break;
2913     }
2914
2915   return 0;
2916 }
2917
2918 /*?
2919  * Configure the set of IPv6 fields used by the flow hash.
2920  *
2921  * @cliexpar
2922  * @parblock
2923  * Example of how to set the flow hash on a given table:
2924  * @cliexcmd{set ip6 flow-hash table 12 dst sport dport proto}
2925  * Example of display the configured flow hash:
2926  * @cliexstart{show ip6 fib}
2927  * FIB lookup table: 65536 buckets, 32 MB heap
2928  * 11 objects, 513k of 515k used, 424 free, 0 reclaimed, 2k overhead, 32764k capacity
2929  *
2930  * VRF 0, fib_index 0, flow hash: src dst sport dport proto
2931  *                  Destination                      Packets          Bytes         Adjacency
2932  * ff02::1/128                                                 0               0 weight 1, index 5
2933  *
2934  * ff02::2/128                                                 0               0 weight 1, index 4
2935  *
2936  * ff02::16/128                                                0               0 weight 1, index 6
2937  *
2938  * ff02::1:ff00:0/104                                          0               0 weight 1, index 3
2939  *
2940  *
2941  * VRF 12, fib_index 1, flow hash: dst sport dport proto
2942  *                  Destination                      Packets          Bytes         Adjacency
2943  * ff02::1/128                                                 0               0 weight 1, index 9
2944  *
2945  * ff02::2/128                                                 0               0 weight 1, index 8
2946  *
2947  * ff02::16/128                                                0               0 weight 1, index 10
2948  *
2949  * ff02::1:ff00:0/104                                          0               0 weight 1, index 7
2950  * @cliexend
2951  * @endparblock
2952 ?*/
2953 /* *INDENT-OFF* */
2954 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2955     .path = "set ip6 flow-hash",
2956     .short_help =
2957     "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2958     .function = set_ip6_flow_hash_command_fn,
2959 };
2960 /* *INDENT-ON* */
2961
2962 static clib_error_t *
2963 show_ip6_local_command_fn (vlib_main_t * vm,
2964                            unformat_input_t * input,
2965                            vlib_cli_command_t * cmd)
2966 {
2967   ip6_main_t * im = &ip6_main;
2968   ip_lookup_main_t * lm = &im->lookup_main;
2969   int i;
2970
2971   vlib_cli_output (vm, "Protocols handled by ip6_local");
2972   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2973     {
2974       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2975         vlib_cli_output (vm, "%d", i);
2976     }
2977   return 0;
2978 }
2979
2980
2981
2982 /*?
2983  * Display the set of protocols handled by the local IPv6 stack.
2984  *
2985  * @cliexpar
2986  * Example of how to display local protocol table:
2987  * @cliexstart{show ip6 local}
2988  * Protocols handled by ip6_local
2989  * 17
2990  * 43
2991  * 58
2992  * 115
2993  * @cliexend
2994 ?*/
2995 /* *INDENT-OFF* */
2996 VLIB_CLI_COMMAND (show_ip6_local, static) = {
2997   .path = "show ip6 local",
2998   .function = show_ip6_local_command_fn,
2999   .short_help = "show ip6 local",
3000 };
3001 /* *INDENT-ON* */
3002
3003 int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3004                                  u32 table_index)
3005 {
3006   vnet_main_t * vnm = vnet_get_main();
3007   vnet_interface_main_t * im = &vnm->interface_main;
3008   ip6_main_t * ipm = &ip6_main;
3009   ip_lookup_main_t * lm = &ipm->lookup_main;
3010   vnet_classify_main_t * cm = &vnet_classify_main;
3011   ip6_address_t *if_addr;
3012
3013   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3014     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3015
3016   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3017     return VNET_API_ERROR_NO_SUCH_ENTRY;
3018
3019   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3020   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3021
3022   if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL);
3023
3024   if (NULL != if_addr)
3025   {
3026       fib_prefix_t pfx = {
3027           .fp_len = 128,
3028           .fp_proto = FIB_PROTOCOL_IP6,
3029           .fp_addr.ip6 = *if_addr,
3030       };
3031       u32 fib_index;
3032
3033       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3034                                                       sw_if_index);
3035
3036
3037       if (table_index != (u32) ~0)
3038       {
3039           dpo_id_t dpo = DPO_NULL;
3040
3041           dpo_set(&dpo,
3042                   DPO_CLASSIFY,
3043                   DPO_PROTO_IP4,
3044                   classify_dpo_create(FIB_PROTOCOL_IP4,
3045                                       table_index));
3046
3047           fib_table_entry_special_dpo_add(fib_index,
3048                                           &pfx,
3049                                           FIB_SOURCE_CLASSIFY,
3050                                           FIB_ENTRY_FLAG_NONE,
3051                                           &dpo);
3052           dpo_reset(&dpo);
3053       }
3054       else
3055       {
3056           fib_table_entry_special_remove(fib_index,
3057                                          &pfx,
3058                                          FIB_SOURCE_CLASSIFY);
3059       }
3060   }
3061
3062   return 0;
3063 }
3064
3065 static clib_error_t *
3066 set_ip6_classify_command_fn (vlib_main_t * vm,
3067                              unformat_input_t * input,
3068                              vlib_cli_command_t * cmd)
3069 {
3070   u32 table_index = ~0;
3071   int table_index_set = 0;
3072   u32 sw_if_index = ~0;
3073   int rv;
3074
3075   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3076     if (unformat (input, "table-index %d", &table_index))
3077       table_index_set = 1;
3078     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3079                        vnet_get_main(), &sw_if_index))
3080         ;
3081     else
3082         break;
3083   }
3084
3085   if (table_index_set == 0)
3086       return clib_error_return (0, "classify table-index must be specified");
3087
3088   if (sw_if_index == ~0)
3089     return clib_error_return (0, "interface / subif must be specified");
3090
3091   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3092
3093   switch (rv)
3094     {
3095     case 0:
3096       break;
3097
3098     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3099       return clib_error_return (0, "No such interface");
3100
3101     case VNET_API_ERROR_NO_SUCH_ENTRY:
3102       return clib_error_return (0, "No such classifier table");
3103     }
3104   return 0;
3105 }
3106
3107 /*?
3108  * Assign a classification table to an interface. The classification
3109  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3110  * commands. Once the table is create, use this command to filter packets
3111  * on an interface.
3112  *
3113  * @cliexpar
3114  * Example of how to assign a classification table to an interface:
3115  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3116 ?*/
3117 /* *INDENT-OFF* */
3118 VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
3119     .path = "set ip6 classify",
3120     .short_help =
3121     "set ip6 classify intfc <interface> table-index <classify-idx>",
3122     .function = set_ip6_classify_command_fn,
3123 };
3124 /* *INDENT-ON* */
3125
3126 static clib_error_t *
3127 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3128 {
3129   ip6_main_t * im = &ip6_main;
3130   uword heapsize = 0;
3131   u32 tmp;
3132   u32 nbuckets = 0;
3133
3134   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3135     if (unformat (input, "hash-buckets %d", &tmp))
3136       nbuckets = tmp;
3137     else if (unformat (input, "heap-size %dm", &tmp))
3138       heapsize = ((u64)tmp) << 20;
3139     else if (unformat (input, "heap-size %dM", &tmp))
3140       heapsize = ((u64)tmp) << 20;
3141     else if (unformat (input, "heap-size %dg", &tmp))
3142       heapsize = ((u64)tmp) << 30;
3143     else if (unformat (input, "heap-size %dG", &tmp))
3144       heapsize = ((u64)tmp) << 30;
3145     else
3146       return clib_error_return (0, "unknown input '%U'",
3147                                 format_unformat_error, input);
3148   }
3149
3150   im->lookup_table_nbuckets = nbuckets;
3151   im->lookup_table_size = heapsize;
3152
3153   return 0;
3154 }
3155
3156 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3157
3158 #define TEST_CODE 1
3159 #if TEST_CODE > 0
3160
3161 static clib_error_t *
3162 set_interface_ip6_output_feature_command_fn (vlib_main_t * vm,
3163                                              unformat_input_t * input,
3164                                              vlib_cli_command_t * cmd)
3165 {
3166   vnet_main_t * vnm = vnet_get_main();
3167   u32 sw_if_index = ~0;
3168   int is_add = 1;
3169   ip6_main_t * im = &ip6_main;
3170   ip_lookup_main_t * lm = &im->lookup_main;
3171
3172   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3173     {
3174       if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3175         ;
3176       else if (unformat (input, "del"))
3177         is_add = 0;
3178       else
3179         break;
3180     }
3181
3182   if (sw_if_index == ~0)
3183     return clib_error_return (0, "unknown interface `%U'",
3184                               format_unformat_error, input);
3185
3186   lm->tx_sw_if_has_ip_output_features =
3187     clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3188
3189   return 0;
3190 }
3191
3192 /*?
3193  * Enable or disable the output feature on an interface.
3194  *
3195  * @todo Need a more detailed description.
3196  *
3197  * @cliexpar
3198  * Example of how to enable the output feature on an interface:
3199  * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0}
3200  * Example of how to disable the output feature on an interface:
3201  * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0 del}
3202 ?*/
3203 /* *INDENT-OFF* */
3204 VLIB_CLI_COMMAND (set_interface_ip6_output_feature, static) = {
3205   .path = "set interface ip6 output feature",
3206   .function = set_interface_ip6_output_feature_command_fn,
3207   .short_help = "set interface ip6 output feature <interface> [del]",
3208 };
3209 /* *INDENT-ON* */
3210
3211 #endif /* TEST_CODE */