Punt: specify packets by IP protocol Type
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_neighbor.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 #ifndef CLIB_MARCH_VARIANT
54 #include <vppinfra/bihash_template.c>
55 #endif
56 #include <vnet/ip/ip6_forward.h>
57 #include <vnet/interface_output.h>
58
59 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
60 #define OI_DECAP   0x80000000
61
62 static void
63 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
64                           ip6_main_t * im, u32 fib_index,
65                           ip_interface_address_t * a)
66 {
67   ip_lookup_main_t *lm = &im->lookup_main;
68   ip6_address_t *address = ip_interface_address_get_address (lm, a);
69   fib_prefix_t pfx = {
70     .fp_len = a->address_length,
71     .fp_proto = FIB_PROTOCOL_IP6,
72     .fp_addr.ip6 = *address,
73   };
74
75   if (a->address_length < 128)
76     {
77       fib_table_entry_update_one_path (fib_index,
78                                        &pfx,
79                                        FIB_SOURCE_INTERFACE,
80                                        (FIB_ENTRY_FLAG_CONNECTED |
81                                         FIB_ENTRY_FLAG_ATTACHED),
82                                        DPO_PROTO_IP6,
83                                        /* No next-hop address */
84                                        NULL, sw_if_index,
85                                        /* invalid FIB index */
86                                        ~0, 1,
87                                        /* no label stack */
88                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
89     }
90
91   pfx.fp_len = 128;
92   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
93     {
94       u32 classify_table_index =
95         lm->classify_table_index_by_sw_if_index[sw_if_index];
96       if (classify_table_index != (u32) ~ 0)
97         {
98           dpo_id_t dpo = DPO_INVALID;
99
100           dpo_set (&dpo,
101                    DPO_CLASSIFY,
102                    DPO_PROTO_IP6,
103                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
104
105           fib_table_entry_special_dpo_add (fib_index,
106                                            &pfx,
107                                            FIB_SOURCE_CLASSIFY,
108                                            FIB_ENTRY_FLAG_NONE, &dpo);
109           dpo_reset (&dpo);
110         }
111     }
112
113   fib_table_entry_update_one_path (fib_index, &pfx,
114                                    FIB_SOURCE_INTERFACE,
115                                    (FIB_ENTRY_FLAG_CONNECTED |
116                                     FIB_ENTRY_FLAG_LOCAL),
117                                    DPO_PROTO_IP6,
118                                    &pfx.fp_addr,
119                                    sw_if_index, ~0,
120                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
121 }
122
123 static void
124 ip6_del_interface_routes (ip6_main_t * im,
125                           u32 fib_index,
126                           ip6_address_t * address, u32 address_length)
127 {
128   fib_prefix_t pfx = {
129     .fp_len = address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   if (pfx.fp_len < 128)
135     {
136       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
137
138     }
139
140   pfx.fp_len = 128;
141   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
142 }
143
144 #ifndef CLIB_MARCH_VARIANT
145 void
146 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
147 {
148   ip6_main_t *im = &ip6_main;
149
150   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
151
152   /*
153    * enable/disable only on the 1<->0 transition
154    */
155   if (is_enable)
156     {
157       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
158         return;
159     }
160   else
161     {
162       /* The ref count is 0 when an address is removed from an interface that has
163        * no address - this is not a ciritical error */
164       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
165           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
166         return;
167     }
168
169   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
170                                !is_enable, 0, 0);
171
172   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
173                                sw_if_index, !is_enable, 0, 0);
174 }
175
176 /* get first interface address */
177 ip6_address_t *
178 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
179 {
180   ip_lookup_main_t *lm = &im->lookup_main;
181   ip_interface_address_t *ia = 0;
182   ip6_address_t *result = 0;
183
184   /* *INDENT-OFF* */
185   foreach_ip_interface_address (lm, ia, sw_if_index,
186                                 1 /* honor unnumbered */,
187   ({
188     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
189     result = a;
190     break;
191   }));
192   /* *INDENT-ON* */
193   return result;
194 }
195
196 clib_error_t *
197 ip6_add_del_interface_address (vlib_main_t * vm,
198                                u32 sw_if_index,
199                                ip6_address_t * address,
200                                u32 address_length, u32 is_del)
201 {
202   vnet_main_t *vnm = vnet_get_main ();
203   ip6_main_t *im = &ip6_main;
204   ip_lookup_main_t *lm = &im->lookup_main;
205   clib_error_t *error;
206   u32 if_address_index;
207   ip6_address_fib_t ip6_af, *addr_fib = 0;
208   ip6_address_t ll_addr;
209
210   /* local0 interface doesn't support IP addressing */
211   if (sw_if_index == 0)
212     {
213       return
214         clib_error_create ("local0 interface doesn't support IP addressing");
215     }
216
217   if (ip6_address_is_link_local_unicast (address))
218     {
219       if (address_length != 128)
220         {
221           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
222           return
223             clib_error_create
224             ("prefix length of link-local address must be 128");
225         }
226       if (!is_del)
227         {
228           return ip6_neighbor_set_link_local_address (vm, sw_if_index,
229                                                       address);
230         }
231       else
232         {
233           ll_addr = ip6_neighbor_get_link_local_address (sw_if_index);
234           if (ip6_address_is_equal (&ll_addr, address))
235             {
236               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
237               return clib_error_create ("address not deletable");
238             }
239           else
240             {
241               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
242               return clib_error_create ("address not found");
243             }
244         }
245     }
246
247   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
248   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
249
250   ip6_addr_fib_init (&ip6_af, address,
251                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
252   vec_add1 (addr_fib, ip6_af);
253
254   /* *INDENT-OFF* */
255   if (!is_del)
256     {
257       /* When adding an address check that it does not conflict
258          with an existing address on any interface in this table. */
259       ip_interface_address_t *ia;
260       vnet_sw_interface_t *sif;
261
262       pool_foreach(sif, vnm->interface_main.sw_interfaces,
263       ({
264           if (im->fib_index_by_sw_if_index[sw_if_index] ==
265               im->fib_index_by_sw_if_index[sif->sw_if_index])
266             {
267               foreach_ip_interface_address
268                 (&im->lookup_main, ia, sif->sw_if_index,
269                  0 /* honor unnumbered */ ,
270                  ({
271                    ip6_address_t * x =
272                      ip_interface_address_get_address
273                      (&im->lookup_main, ia);
274                    if (ip6_destination_matches_route
275                        (im, address, x, ia->address_length) ||
276                        ip6_destination_matches_route (im,
277                                                       x,
278                                                       address,
279                                                       address_length))
280                      {
281                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
282                        return
283                          clib_error_create
284                          ("failed to add %U which conflicts with %U for interface %U",
285                           format_ip6_address_and_length, address,
286                           address_length,
287                           format_ip6_address_and_length, x,
288                           ia->address_length,
289                           format_vnet_sw_if_index_name, vnm,
290                           sif->sw_if_index);
291                      }
292                  }));
293             }
294       }));
295     }
296   /* *INDENT-ON* */
297
298   {
299     uword elts_before = pool_elts (lm->if_address_pool);
300
301     error = ip_interface_address_add_del
302       (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
303     if (error)
304       goto done;
305
306     /* Pool did not grow: add duplicate address. */
307     if (elts_before == pool_elts (lm->if_address_pool))
308       goto done;
309   }
310
311   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
312
313   if (is_del)
314     ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
315   else
316     ip6_add_interface_routes (vnm, sw_if_index,
317                               im, ip6_af.fib_index,
318                               pool_elt_at_index (lm->if_address_pool,
319                                                  if_address_index));
320
321   {
322     ip6_add_del_interface_address_callback_t *cb;
323     vec_foreach (cb, im->add_del_interface_address_callbacks)
324       cb->function (im, cb->function_opaque, sw_if_index,
325                     address, address_length, if_address_index, is_del);
326   }
327
328 done:
329   vec_free (addr_fib);
330   return error;
331 }
332
333 #endif
334
335 static clib_error_t *
336 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
337 {
338   ip6_main_t *im = &ip6_main;
339   ip_interface_address_t *ia;
340   ip6_address_t *a;
341   u32 is_admin_up, fib_index;
342
343   /* Fill in lookup tables with default table (0). */
344   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
345
346   vec_validate_init_empty (im->
347                            lookup_main.if_address_pool_index_by_sw_if_index,
348                            sw_if_index, ~0);
349
350   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
351
352   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
353
354   /* *INDENT-OFF* */
355   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
356                                 0 /* honor unnumbered */,
357   ({
358     a = ip_interface_address_get_address (&im->lookup_main, ia);
359     if (is_admin_up)
360       ip6_add_interface_routes (vnm, sw_if_index,
361                                 im, fib_index,
362                                 ia);
363     else
364       ip6_del_interface_routes (im, fib_index,
365                                 a, ia->address_length);
366   }));
367   /* *INDENT-ON* */
368
369   return 0;
370 }
371
372 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
373
374 /* Built-in ip6 unicast rx feature path definition */
375 /* *INDENT-OFF* */
376 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
377 {
378   .arc_name  = "ip6-unicast",
379   .start_nodes = VNET_FEATURES ("ip6-input"),
380   .last_in_arc = "ip6-lookup",
381   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
382 };
383
384 VNET_FEATURE_INIT (ip6_flow_classify, static) =
385 {
386   .arc_name = "ip6-unicast",
387   .node_name = "ip6-flow-classify",
388   .runs_before = VNET_FEATURES ("ip6-inacl"),
389 };
390
391 VNET_FEATURE_INIT (ip6_inacl, static) =
392 {
393   .arc_name = "ip6-unicast",
394   .node_name = "ip6-inacl",
395   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
396 };
397
398 VNET_FEATURE_INIT (ip6_policer_classify, static) =
399 {
400   .arc_name = "ip6-unicast",
401   .node_name = "ip6-policer-classify",
402   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
403 };
404
405 VNET_FEATURE_INIT (ip6_ipsec, static) =
406 {
407   .arc_name = "ip6-unicast",
408   .node_name = "ipsec6-input-feature",
409   .runs_before = VNET_FEATURES ("l2tp-decap"),
410 };
411
412 VNET_FEATURE_INIT (ip6_l2tp, static) =
413 {
414   .arc_name = "ip6-unicast",
415   .node_name = "l2tp-decap",
416   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
417 };
418
419 VNET_FEATURE_INIT (ip6_vpath, static) =
420 {
421   .arc_name = "ip6-unicast",
422   .node_name = "vpath-input-ip6",
423   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
424 };
425
426 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
427 {
428   .arc_name = "ip6-unicast",
429   .node_name = "ip6-vxlan-bypass",
430   .runs_before = VNET_FEATURES ("ip6-lookup"),
431 };
432
433 VNET_FEATURE_INIT (ip6_not_enabled, static) =
434 {
435   .arc_name = "ip6-unicast",
436   .node_name = "ip6-not-enabled",
437   .runs_before = VNET_FEATURES ("ip6-lookup"),
438 };
439
440 VNET_FEATURE_INIT (ip6_lookup, static) =
441 {
442   .arc_name = "ip6-unicast",
443   .node_name = "ip6-lookup",
444   .runs_before = 0,  /*last feature*/
445 };
446
447 /* Built-in ip6 multicast rx feature path definition (none now) */
448 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
449 {
450   .arc_name  = "ip6-multicast",
451   .start_nodes = VNET_FEATURES ("ip6-input"),
452   .last_in_arc = "ip6-mfib-forward-lookup",
453   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
454 };
455
456 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
457   .arc_name = "ip6-multicast",
458   .node_name = "vpath-input-ip6",
459   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
460 };
461
462 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
463   .arc_name = "ip6-multicast",
464   .node_name = "ip6-not-enabled",
465   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
466 };
467
468 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
469   .arc_name = "ip6-multicast",
470   .node_name = "ip6-mfib-forward-lookup",
471   .runs_before = 0, /* last feature */
472 };
473
474 /* Built-in ip4 tx feature path definition */
475 VNET_FEATURE_ARC_INIT (ip6_output, static) =
476 {
477   .arc_name  = "ip6-output",
478   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
479   .last_in_arc = "interface-output",
480   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
481 };
482
483 VNET_FEATURE_INIT (ip6_outacl, static) = {
484   .arc_name = "ip6-output",
485   .node_name = "ip6-outacl",
486   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
487 };
488
489 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
490   .arc_name = "ip6-output",
491   .node_name = "ipsec6-output-feature",
492   .runs_before = VNET_FEATURES ("interface-output"),
493 };
494
495 VNET_FEATURE_INIT (ip6_interface_output, static) = {
496   .arc_name = "ip6-output",
497   .node_name = "interface-output",
498   .runs_before = 0, /* not before any other features */
499 };
500 /* *INDENT-ON* */
501
502 static clib_error_t *
503 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
504 {
505   ip6_main_t *im = &ip6_main;
506
507   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
508   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
509
510   if (!is_add)
511     {
512       /* Ensure that IPv6 is disabled */
513       ip6_main_t *im6 = &ip6_main;
514       ip_lookup_main_t *lm6 = &im6->lookup_main;
515       ip_interface_address_t *ia = 0;
516       ip6_address_t *address;
517       vlib_main_t *vm = vlib_get_main ();
518
519       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
520       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
521       /* *INDENT-OFF* */
522       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
523       ({
524         address = ip_interface_address_get_address (lm6, ia);
525         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
526       }));
527       /* *INDENT-ON* */
528       ip6_mfib_interface_enable_disable (sw_if_index, 0);
529     }
530
531   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
532                                is_add, 0, 0);
533
534   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
535                                sw_if_index, is_add, 0, 0);
536
537   return /* no error */ 0;
538 }
539
540 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
541
542 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
543                                 vlib_node_runtime_t * node,
544                                 vlib_frame_t * frame)
545 {
546   return ip6_lookup_inline (vm, node, frame);
547 }
548
549 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
550
551 /* *INDENT-OFF* */
552 VLIB_REGISTER_NODE (ip6_lookup_node) =
553 {
554   .name = "ip6-lookup",
555   .vector_size = sizeof (u32),
556   .format_trace = format_ip6_lookup_trace,
557   .n_next_nodes = IP6_LOOKUP_N_NEXT,
558   .next_nodes = IP6_LOOKUP_NEXT_NODES,
559 };
560 /* *INDENT-ON* */
561
562 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
563                                       vlib_node_runtime_t * node,
564                                       vlib_frame_t * frame)
565 {
566   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
567   u32 n_left, *from;
568   u32 thread_index = vm->thread_index;
569   ip6_main_t *im = &ip6_main;
570   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
571   u16 nexts[VLIB_FRAME_SIZE], *next;
572
573   from = vlib_frame_vector_args (frame);
574   n_left = frame->n_vectors;
575   next = nexts;
576
577   vlib_get_buffers (vm, from, bufs, n_left);
578
579   while (n_left >= 4)
580     {
581       const load_balance_t *lb0, *lb1;
582       const ip6_header_t *ip0, *ip1;
583       u32 lbi0, hc0, lbi1, hc1;
584       const dpo_id_t *dpo0, *dpo1;
585
586       /* Prefetch next iteration. */
587       {
588         vlib_prefetch_buffer_header (b[2], STORE);
589         vlib_prefetch_buffer_header (b[3], STORE);
590
591         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
592         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
593       }
594
595       ip0 = vlib_buffer_get_current (b[0]);
596       ip1 = vlib_buffer_get_current (b[1]);
597       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
598       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
599
600       lb0 = load_balance_get (lbi0);
601       lb1 = load_balance_get (lbi1);
602
603       /*
604        * this node is for via FIBs we can re-use the hash value from the
605        * to node if present.
606        * We don't want to use the same hash value at each level in the recursion
607        * graph as that would lead to polarisation
608        */
609       hc0 = hc1 = 0;
610
611       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
612         {
613           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
614             {
615               hc0 = vnet_buffer (b[0])->ip.flow_hash =
616                 vnet_buffer (b[0])->ip.flow_hash >> 1;
617             }
618           else
619             {
620               hc0 = vnet_buffer (b[0])->ip.flow_hash =
621                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
622             }
623           dpo0 = load_balance_get_fwd_bucket
624             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
625         }
626       else
627         {
628           dpo0 = load_balance_get_bucket_i (lb0, 0);
629         }
630       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
631         {
632           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
633             {
634               hc1 = vnet_buffer (b[1])->ip.flow_hash =
635                 vnet_buffer (b[1])->ip.flow_hash >> 1;
636             }
637           else
638             {
639               hc1 = vnet_buffer (b[1])->ip.flow_hash =
640                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
641             }
642           dpo1 = load_balance_get_fwd_bucket
643             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
644         }
645       else
646         {
647           dpo1 = load_balance_get_bucket_i (lb1, 0);
648         }
649
650       next[0] = dpo0->dpoi_next_node;
651       next[1] = dpo1->dpoi_next_node;
652
653       /* Only process the HBH Option Header if explicitly configured to do so */
654       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
655         {
656           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
657             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
658         }
659       /* Only process the HBH Option Header if explicitly configured to do so */
660       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
661         {
662           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
663             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
664         }
665
666       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
667       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
668
669       vlib_increment_combined_counter
670         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
671       vlib_increment_combined_counter
672         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
673
674       b += 2;
675       next += 2;
676       n_left -= 2;
677     }
678
679   while (n_left > 0)
680     {
681       const load_balance_t *lb0;
682       const ip6_header_t *ip0;
683       const dpo_id_t *dpo0;
684       u32 lbi0, hc0;
685
686       ip0 = vlib_buffer_get_current (b[0]);
687       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
688
689       lb0 = load_balance_get (lbi0);
690
691       hc0 = 0;
692       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
693         {
694           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
695             {
696               hc0 = vnet_buffer (b[0])->ip.flow_hash =
697                 vnet_buffer (b[0])->ip.flow_hash >> 1;
698             }
699           else
700             {
701               hc0 = vnet_buffer (b[0])->ip.flow_hash =
702                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
703             }
704           dpo0 = load_balance_get_fwd_bucket
705             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
706         }
707       else
708         {
709           dpo0 = load_balance_get_bucket_i (lb0, 0);
710         }
711
712       next[0] = dpo0->dpoi_next_node;
713       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
714
715       /* Only process the HBH Option Header if explicitly configured to do so */
716       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
717         {
718           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
719             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
720         }
721
722       vlib_increment_combined_counter
723         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
724
725       b += 1;
726       next += 1;
727       n_left -= 1;
728     }
729
730   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
731
732   if (node->flags & VLIB_NODE_FLAG_TRACE)
733     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
734
735   return frame->n_vectors;
736 }
737
738 /* *INDENT-OFF* */
739 VLIB_REGISTER_NODE (ip6_load_balance_node) =
740 {
741   .name = "ip6-load-balance",
742   .vector_size = sizeof (u32),
743   .sibling_of = "ip6-lookup",
744   .format_trace = format_ip6_lookup_trace,
745 };
746 /* *INDENT-ON* */
747
748 typedef struct
749 {
750   /* Adjacency taken. */
751   u32 adj_index;
752   u32 flow_hash;
753   u32 fib_index;
754
755   /* Packet data, possibly *after* rewrite. */
756   u8 packet_data[128 - 1 * sizeof (u32)];
757 }
758 ip6_forward_next_trace_t;
759
760 #ifndef CLIB_MARCH_VARIANT
761 u8 *
762 format_ip6_forward_next_trace (u8 * s, va_list * args)
763 {
764   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
765   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
766   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
767   u32 indent = format_get_indent (s);
768
769   s = format (s, "%U%U",
770               format_white_space, indent,
771               format_ip6_header, t->packet_data, sizeof (t->packet_data));
772   return s;
773 }
774 #endif
775
776 static u8 *
777 format_ip6_lookup_trace (u8 * s, va_list * args)
778 {
779   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
780   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
781   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
782   u32 indent = format_get_indent (s);
783
784   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
785               t->fib_index, t->adj_index, t->flow_hash);
786   s = format (s, "\n%U%U",
787               format_white_space, indent,
788               format_ip6_header, t->packet_data, sizeof (t->packet_data));
789   return s;
790 }
791
792
793 static u8 *
794 format_ip6_rewrite_trace (u8 * s, va_list * args)
795 {
796   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
797   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
798   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
799   u32 indent = format_get_indent (s);
800
801   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
802               t->fib_index, t->adj_index, format_ip_adjacency,
803               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
804   s = format (s, "\n%U%U",
805               format_white_space, indent,
806               format_ip_adjacency_packet_data,
807               t->adj_index, t->packet_data, sizeof (t->packet_data));
808   return s;
809 }
810
811 /* Common trace function for all ip6-forward next nodes. */
812 #ifndef CLIB_MARCH_VARIANT
813 void
814 ip6_forward_next_trace (vlib_main_t * vm,
815                         vlib_node_runtime_t * node,
816                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
817 {
818   u32 *from, n_left;
819   ip6_main_t *im = &ip6_main;
820
821   n_left = frame->n_vectors;
822   from = vlib_frame_vector_args (frame);
823
824   while (n_left >= 4)
825     {
826       u32 bi0, bi1;
827       vlib_buffer_t *b0, *b1;
828       ip6_forward_next_trace_t *t0, *t1;
829
830       /* Prefetch next iteration. */
831       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
832       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
833
834       bi0 = from[0];
835       bi1 = from[1];
836
837       b0 = vlib_get_buffer (vm, bi0);
838       b1 = vlib_get_buffer (vm, bi1);
839
840       if (b0->flags & VLIB_BUFFER_IS_TRACED)
841         {
842           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
843           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
844           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
845           t0->fib_index =
846             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
847              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
848             vec_elt (im->fib_index_by_sw_if_index,
849                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
850
851           clib_memcpy_fast (t0->packet_data,
852                             vlib_buffer_get_current (b0),
853                             sizeof (t0->packet_data));
854         }
855       if (b1->flags & VLIB_BUFFER_IS_TRACED)
856         {
857           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
858           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
859           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
860           t1->fib_index =
861             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
862              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
863             vec_elt (im->fib_index_by_sw_if_index,
864                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
865
866           clib_memcpy_fast (t1->packet_data,
867                             vlib_buffer_get_current (b1),
868                             sizeof (t1->packet_data));
869         }
870       from += 2;
871       n_left -= 2;
872     }
873
874   while (n_left >= 1)
875     {
876       u32 bi0;
877       vlib_buffer_t *b0;
878       ip6_forward_next_trace_t *t0;
879
880       bi0 = from[0];
881
882       b0 = vlib_get_buffer (vm, bi0);
883
884       if (b0->flags & VLIB_BUFFER_IS_TRACED)
885         {
886           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
887           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
888           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
889           t0->fib_index =
890             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
891              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
892             vec_elt (im->fib_index_by_sw_if_index,
893                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
894
895           clib_memcpy_fast (t0->packet_data,
896                             vlib_buffer_get_current (b0),
897                             sizeof (t0->packet_data));
898         }
899       from += 1;
900       n_left -= 1;
901     }
902 }
903
904 /* Compute TCP/UDP/ICMP6 checksum in software. */
905 u16
906 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
907                                    ip6_header_t * ip0, int *bogus_lengthp)
908 {
909   ip_csum_t sum0;
910   u16 sum16, payload_length_host_byte_order;
911   u32 i, n_this_buffer, n_bytes_left;
912   u32 headers_size = sizeof (ip0[0]);
913   void *data_this_buffer;
914
915   ASSERT (bogus_lengthp);
916   *bogus_lengthp = 0;
917
918   /* Initialize checksum with ip header. */
919   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
920   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
921   data_this_buffer = (void *) (ip0 + 1);
922
923   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
924     {
925       sum0 = ip_csum_with_carry (sum0,
926                                  clib_mem_unaligned (&ip0->
927                                                      src_address.as_uword[i],
928                                                      uword));
929       sum0 =
930         ip_csum_with_carry (sum0,
931                             clib_mem_unaligned (&ip0->dst_address.as_uword[i],
932                                                 uword));
933     }
934
935   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
936    * or UDP-Ping packets */
937   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
938     {
939       u32 skip_bytes;
940       ip6_hop_by_hop_ext_t *ext_hdr =
941         (ip6_hop_by_hop_ext_t *) data_this_buffer;
942
943       /* validate really icmp6 next */
944       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
945               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
946
947       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
948       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
949
950       payload_length_host_byte_order -= skip_bytes;
951       headers_size += skip_bytes;
952     }
953
954   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
955   if (p0 && n_this_buffer + headers_size > p0->current_length)
956     n_this_buffer =
957       p0->current_length >
958       headers_size ? p0->current_length - headers_size : 0;
959   while (1)
960     {
961       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
962       n_bytes_left -= n_this_buffer;
963       if (n_bytes_left == 0)
964         break;
965
966       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
967         {
968           *bogus_lengthp = 1;
969           return 0xfefe;
970         }
971       p0 = vlib_get_buffer (vm, p0->next_buffer);
972       data_this_buffer = vlib_buffer_get_current (p0);
973       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
974     }
975
976   sum16 = ~ip_csum_fold (sum0);
977
978   return sum16;
979 }
980
981 u32
982 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
983 {
984   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
985   udp_header_t *udp0;
986   u16 sum16;
987   int bogus_length;
988
989   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
990   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
991           || ip0->protocol == IP_PROTOCOL_ICMP6
992           || ip0->protocol == IP_PROTOCOL_UDP
993           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
994
995   udp0 = (void *) (ip0 + 1);
996   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
997     {
998       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
999                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1000       return p0->flags;
1001     }
1002
1003   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1004
1005   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1006                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1007
1008   return p0->flags;
1009 }
1010 #endif
1011
1012 /**
1013  * @brief returns number of links on which src is reachable.
1014  */
1015 always_inline int
1016 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1017 {
1018   const load_balance_t *lb0;
1019   index_t lbi;
1020   u32 fib_index;
1021
1022   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1023                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1024   fib_index =
1025     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1026     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1027
1028   lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address);
1029   lb0 = load_balance_get (lbi);
1030
1031   return (fib_urpf_check_size (lb0->lb_urpf));
1032 }
1033
1034 always_inline u8
1035 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1036                            u32 * udp_offset0)
1037 {
1038   u32 proto0;
1039   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1040   if (proto0 != IP_PROTOCOL_UDP)
1041     {
1042       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1043       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1044     }
1045   return proto0;
1046 }
1047
1048 /* *INDENT-OFF* */
1049 VNET_FEATURE_ARC_INIT (ip6_local) =
1050 {
1051   .arc_name  = "ip6-local",
1052   .start_nodes = VNET_FEATURES ("ip6-local"),
1053 };
1054 /* *INDENT-ON* */
1055
1056 always_inline uword
1057 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1058                   vlib_frame_t * frame, int head_of_feature_arc)
1059 {
1060   ip6_main_t *im = &ip6_main;
1061   ip_lookup_main_t *lm = &im->lookup_main;
1062   u32 *from, n_left_from;
1063   vlib_node_runtime_t *error_node =
1064     vlib_node_get_runtime (vm, ip6_input_node.index);
1065   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1066   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1067   u16 nexts[VLIB_FRAME_SIZE], *next;
1068
1069   from = vlib_frame_vector_args (frame);
1070   n_left_from = frame->n_vectors;
1071
1072   if (node->flags & VLIB_NODE_FLAG_TRACE)
1073     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1074
1075   vlib_get_buffers (vm, from, bufs, n_left_from);
1076   b = bufs;
1077   next = nexts;
1078
1079   while (n_left_from > 2)
1080     {
1081       /* Prefetch next iteration. */
1082       if (n_left_from >= 6)
1083         {
1084           vlib_prefetch_buffer_header (b[4], STORE);
1085           vlib_prefetch_buffer_header (b[5], STORE);
1086           vlib_prefetch_buffer_data (b[2], LOAD);
1087           vlib_prefetch_buffer_data (b[3], LOAD);
1088         }
1089
1090       u8 error[2];
1091       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1092       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1093
1094       ip6_header_t *ip[2];
1095       ip[0] = vlib_buffer_get_current (b[0]);
1096       ip[1] = vlib_buffer_get_current (b[1]);
1097
1098       if (head_of_feature_arc)
1099         {
1100           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1101           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1102
1103           u8 type[2];
1104           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1105           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1106
1107           u32 flags[2];
1108           flags[0] = b[0]->flags;
1109           flags[1] = b[1]->flags;
1110
1111           u32 good_l4_csum[2];
1112           good_l4_csum[0] =
1113             flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1114                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1115                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1116           good_l4_csum[1] =
1117             flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1118                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1119                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1120
1121           u32 udp_offset[2] = { };
1122           u8 is_tcp_udp[2];
1123           is_tcp_udp[0] =
1124             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1125           is_tcp_udp[1] =
1126             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1127           i16 len_diff[2] = { 0 };
1128           if (PREDICT_TRUE (is_tcp_udp[0]))
1129             {
1130               udp_header_t *udp =
1131                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1132               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1133                 && udp->checksum == 0;
1134               /* optimistically verify UDP length. */
1135               u16 ip_len, udp_len;
1136               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1137               udp_len = clib_net_to_host_u16 (udp->length);
1138               len_diff[0] = ip_len - udp_len;
1139             }
1140           if (PREDICT_TRUE (is_tcp_udp[1]))
1141             {
1142               udp_header_t *udp =
1143                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1144               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1145                 && udp->checksum == 0;
1146               /* optimistically verify UDP length. */
1147               u16 ip_len, udp_len;
1148               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1149               udp_len = clib_net_to_host_u16 (udp->length);
1150               len_diff[1] = ip_len - udp_len;
1151             }
1152
1153           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1154           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1155
1156           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1157           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1158
1159           u8 need_csum[2];
1160           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1161             && !good_l4_csum[0]
1162             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1163           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1164             && !good_l4_csum[1]
1165             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1166           if (PREDICT_FALSE (need_csum[0]))
1167             {
1168               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1169               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1170             }
1171           if (PREDICT_FALSE (need_csum[1]))
1172             {
1173               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1174               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1175             }
1176
1177           error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1178           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1179           error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1180           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1181
1182           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1183                          IP6_ERROR_UDP_CHECKSUM,
1184                          "Wrong IP6 errors constants");
1185           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1186                          IP6_ERROR_ICMP_CHECKSUM,
1187                          "Wrong IP6 errors constants");
1188
1189           error[0] =
1190             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1191           error[1] =
1192             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1193
1194           /* Drop packets from unroutable hosts. */
1195           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1196           u8 unroutable[2];
1197           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1198             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1199             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1200           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1201             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1202             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1203           if (PREDICT_FALSE (unroutable[0]))
1204             {
1205               error[0] =
1206                 !ip6_urpf_loose_check (im, b[0],
1207                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1208                 : error[0];
1209             }
1210           if (PREDICT_FALSE (unroutable[1]))
1211             {
1212               error[1] =
1213                 !ip6_urpf_loose_check (im, b[1],
1214                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1215                 : error[1];
1216             }
1217
1218           vnet_buffer (b[0])->ip.fib_index =
1219             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1220             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1221             vnet_buffer (b[0])->ip.fib_index;
1222           vnet_buffer (b[1])->ip.fib_index =
1223             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1224             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1225             vnet_buffer (b[1])->ip.fib_index;
1226         }                       /* head_of_feature_arc */
1227
1228       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1229       next[0] =
1230         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1231       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1232       next[1] =
1233         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1234
1235       b[0]->error = error_node->errors[0];
1236       b[1]->error = error_node->errors[1];
1237
1238       if (head_of_feature_arc)
1239         {
1240           u8 ip6_unknown[2];
1241           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1242           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1243           if (PREDICT_TRUE (ip6_unknown[0]))
1244             {
1245               u32 next32 = next[0];
1246               vnet_feature_arc_start (arc_index,
1247                                       vnet_buffer (b[0])->sw_if_index
1248                                       [VLIB_RX], &next32, b[0]);
1249               next[0] = next32;
1250             }
1251           if (PREDICT_TRUE (ip6_unknown[1]))
1252             {
1253               u32 next32 = next[1];
1254               vnet_feature_arc_start (arc_index,
1255                                       vnet_buffer (b[1])->sw_if_index
1256                                       [VLIB_RX], &next32, b[1]);
1257               next[1] = next32;
1258             }
1259         }
1260
1261       /* next */
1262       b += 2;
1263       next += 2;
1264       n_left_from -= 2;
1265     }
1266
1267   while (n_left_from)
1268     {
1269       u8 error;
1270       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1271
1272       ip6_header_t *ip;
1273       ip = vlib_buffer_get_current (b[0]);
1274
1275       if (head_of_feature_arc)
1276         {
1277           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1278           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1279
1280           u32 flags = b[0]->flags;
1281           u32 good_l4_csum =
1282             flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1283                      VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1284                      VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1285
1286           u32 udp_offset;
1287           i16 len_diff = 0;
1288           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1289           if (PREDICT_TRUE (is_tcp_udp))
1290             {
1291               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1292               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1293               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1294                 && udp->checksum == 0;
1295               /* optimistically verify UDP length. */
1296               u16 ip_len, udp_len;
1297               ip_len = clib_net_to_host_u16 (ip->payload_length);
1298               udp_len = clib_net_to_host_u16 (udp->length);
1299               len_diff = ip_len - udp_len;
1300             }
1301
1302           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1303           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1304
1305           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum
1306             && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1307           if (PREDICT_FALSE (need_csum))
1308             {
1309               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1310               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1311             }
1312
1313           error = IP6_ERROR_UNKNOWN_PROTOCOL;
1314           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1315
1316           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1317                          IP6_ERROR_UDP_CHECKSUM,
1318                          "Wrong IP6 errors constants");
1319           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1320                          IP6_ERROR_ICMP_CHECKSUM,
1321                          "Wrong IP6 errors constants");
1322
1323           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1324
1325           /* Drop packets from unroutable hosts. */
1326           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1327           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1328             && type != IP_BUILTIN_PROTOCOL_ICMP
1329             && !ip6_address_is_link_local_unicast (&ip->src_address);
1330           if (PREDICT_FALSE (unroutable))
1331             {
1332               error =
1333                 !ip6_urpf_loose_check (im, b[0],
1334                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1335                 error;
1336             }
1337
1338           vnet_buffer (b[0])->ip.fib_index =
1339             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1340             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1341             vnet_buffer (b[0])->ip.fib_index;
1342         }                       /* head_of_feature_arc */
1343
1344       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1345       next[0] =
1346         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1347
1348       b[0]->error = error_node->errors[0];
1349
1350       if (head_of_feature_arc)
1351         {
1352           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1353             {
1354               u32 next32 = next[0];
1355               vnet_feature_arc_start (arc_index,
1356                                       vnet_buffer (b[0])->sw_if_index
1357                                       [VLIB_RX], &next32, b[0]);
1358               next[0] = next32;
1359             }
1360         }
1361
1362       /* next */
1363       b += 1;
1364       next += 1;
1365       n_left_from -= 1;
1366     }
1367
1368   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1369   return frame->n_vectors;
1370 }
1371
1372 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1373                                vlib_frame_t * frame)
1374 {
1375   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1376 }
1377
1378 /* *INDENT-OFF* */
1379 VLIB_REGISTER_NODE (ip6_local_node) =
1380 {
1381   .name = "ip6-local",
1382   .vector_size = sizeof (u32),
1383   .format_trace = format_ip6_forward_next_trace,
1384   .n_next_nodes = IP_LOCAL_N_NEXT,
1385   .next_nodes =
1386   {
1387     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1388     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1389     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1390     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1391     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-reassembly",
1392   },
1393 };
1394 /* *INDENT-ON* */
1395
1396 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1397                                           vlib_node_runtime_t * node,
1398                                           vlib_frame_t * frame)
1399 {
1400   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1401 }
1402
1403 /* *INDENT-OFF* */
1404 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1405   .name = "ip6-local-end-of-arc",
1406   .vector_size = sizeof (u32),
1407
1408   .format_trace = format_ip6_forward_next_trace,
1409   .sibling_of = "ip6-local",
1410 };
1411
1412 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1413   .arc_name = "ip6-local",
1414   .node_name = "ip6-local-end-of-arc",
1415   .runs_before = 0, /* not before any other features */
1416 };
1417 /* *INDENT-ON* */
1418
1419 #ifdef CLIB_MARCH_VARIANT
1420 extern vlib_node_registration_t ip6_local_node;
1421
1422 #else
1423
1424 void
1425 ip6_register_protocol (u32 protocol, u32 node_index)
1426 {
1427   vlib_main_t *vm = vlib_get_main ();
1428   ip6_main_t *im = &ip6_main;
1429   ip_lookup_main_t *lm = &im->lookup_main;
1430
1431   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1432   lm->local_next_by_ip_protocol[protocol] =
1433     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1434 }
1435
1436 void
1437 ip6_unregister_protocol (u32 protocol)
1438 {
1439   ip6_main_t *im = &ip6_main;
1440   ip_lookup_main_t *lm = &im->lookup_main;
1441
1442   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1443   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1444 }
1445
1446 clib_error_t *
1447 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index,
1448                     u8 refresh)
1449 {
1450   vnet_main_t *vnm = vnet_get_main ();
1451   ip6_main_t *im = &ip6_main;
1452   icmp6_neighbor_solicitation_header_t *h;
1453   ip6_address_t *src;
1454   ip_interface_address_t *ia;
1455   ip_adjacency_t *adj;
1456   vnet_hw_interface_t *hi;
1457   vnet_sw_interface_t *si;
1458   vlib_buffer_t *b;
1459   adj_index_t ai;
1460   u32 bi = 0;
1461   int bogus_length;
1462
1463   si = vnet_get_sw_interface (vnm, sw_if_index);
1464
1465   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1466     {
1467       return clib_error_return (0, "%U: interface %U down",
1468                                 format_ip6_address, dst,
1469                                 format_vnet_sw_if_index_name, vnm,
1470                                 sw_if_index);
1471     }
1472
1473   src =
1474     ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1475   if (!src)
1476     {
1477       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1478       return clib_error_return
1479         (0, "no matching interface address for destination %U (interface %U)",
1480          format_ip6_address, dst,
1481          format_vnet_sw_if_index_name, vnm, sw_if_index);
1482     }
1483
1484   h =
1485     vlib_packet_template_get_packet (vm,
1486                                      &im->discover_neighbor_packet_template,
1487                                      &bi);
1488   if (!h)
1489     return clib_error_return (0, "ICMP6 NS packet allocation failed");
1490
1491   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1492
1493   /* Destination address is a solicited node multicast address.  We need to fill in
1494      the low 24 bits with low 24 bits of target's address. */
1495   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1496   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1497   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1498
1499   h->ip.src_address = src[0];
1500   h->neighbor.target_address = dst[0];
1501
1502   if (PREDICT_FALSE (!hi->hw_address))
1503     {
1504       return clib_error_return (0, "%U: interface %U do not support ip probe",
1505                                 format_ip6_address, dst,
1506                                 format_vnet_sw_if_index_name, vnm,
1507                                 sw_if_index);
1508     }
1509
1510   clib_memcpy_fast (h->link_layer_option.ethernet_address, hi->hw_address,
1511                     vec_len (hi->hw_address));
1512
1513   h->neighbor.icmp.checksum =
1514     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1515   ASSERT (bogus_length == 0);
1516
1517   b = vlib_get_buffer (vm, bi);
1518   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1519     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1520
1521   /* Add encapsulation string for software interface (e.g. ethernet header). */
1522   ip46_address_t nh = {
1523     .ip6 = *dst,
1524   };
1525
1526   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
1527                             VNET_LINK_IP6, &nh, sw_if_index);
1528   adj = adj_get (ai);
1529
1530   /* Peer has been previously resolved, retrieve glean adj instead */
1531   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE && refresh == 0)
1532     {
1533       adj_unlock (ai);
1534       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
1535                                   VNET_LINK_IP6, sw_if_index, &nh);
1536       adj = adj_get (ai);
1537     }
1538
1539   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1540   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1541
1542   {
1543     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1544     u32 *to_next = vlib_frame_vector_args (f);
1545     to_next[0] = bi;
1546     f->n_vectors = 1;
1547     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1548   }
1549
1550   adj_unlock (ai);
1551   return /* no error */ 0;
1552 }
1553 #endif
1554
1555 typedef enum
1556 {
1557   IP6_REWRITE_NEXT_DROP,
1558   IP6_REWRITE_NEXT_ICMP_ERROR,
1559   IP6_REWRITE_NEXT_FRAGMENT,
1560   IP6_REWRITE_N_NEXT            /* Last */
1561 } ip6_rewrite_next_t;
1562
1563 /**
1564  * This bits of an IPv6 address to mask to construct a multicast
1565  * MAC address
1566  */
1567 #define IP6_MCAST_ADDR_MASK 0xffffffff
1568
1569 always_inline void
1570 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1571                u16 adj_packet_bytes, bool is_locally_generated,
1572                u32 * next, u32 * error)
1573 {
1574   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1575     {
1576       if (is_locally_generated)
1577         {
1578           /* IP fragmentation */
1579           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1580                                    IP6_FRAG_NEXT_IP6_REWRITE, 0);
1581           *next = IP6_REWRITE_NEXT_FRAGMENT;
1582           *error = IP6_ERROR_MTU_EXCEEDED;
1583         }
1584       else
1585         {
1586           *error = IP6_ERROR_MTU_EXCEEDED;
1587           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1588                                        adj_packet_bytes);
1589           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1590         }
1591     }
1592 }
1593
1594 always_inline uword
1595 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1596                              vlib_node_runtime_t * node,
1597                              vlib_frame_t * frame,
1598                              int do_counters, int is_midchain, int is_mcast,
1599                              int do_gso)
1600 {
1601   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1602   u32 *from = vlib_frame_vector_args (frame);
1603   u32 n_left_from, n_left_to_next, *to_next, next_index;
1604   vlib_node_runtime_t *error_node =
1605     vlib_node_get_runtime (vm, ip6_input_node.index);
1606
1607   n_left_from = frame->n_vectors;
1608   next_index = node->cached_next_index;
1609   u32 thread_index = vm->thread_index;
1610
1611   while (n_left_from > 0)
1612     {
1613       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1614
1615       while (n_left_from >= 4 && n_left_to_next >= 2)
1616         {
1617           ip_adjacency_t *adj0, *adj1;
1618           vlib_buffer_t *p0, *p1;
1619           ip6_header_t *ip0, *ip1;
1620           u32 pi0, rw_len0, next0, error0, adj_index0;
1621           u32 pi1, rw_len1, next1, error1, adj_index1;
1622           u32 tx_sw_if_index0, tx_sw_if_index1;
1623           bool is_locally_originated0, is_locally_originated1;
1624
1625           /* Prefetch next iteration. */
1626           {
1627             vlib_buffer_t *p2, *p3;
1628
1629             p2 = vlib_get_buffer (vm, from[2]);
1630             p3 = vlib_get_buffer (vm, from[3]);
1631
1632             vlib_prefetch_buffer_header (p2, LOAD);
1633             vlib_prefetch_buffer_header (p3, LOAD);
1634
1635             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1636             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1637
1638             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1639             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1640           }
1641
1642           pi0 = to_next[0] = from[0];
1643           pi1 = to_next[1] = from[1];
1644
1645           from += 2;
1646           n_left_from -= 2;
1647           to_next += 2;
1648           n_left_to_next -= 2;
1649
1650           p0 = vlib_get_buffer (vm, pi0);
1651           p1 = vlib_get_buffer (vm, pi1);
1652
1653           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1654           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1655
1656           ip0 = vlib_buffer_get_current (p0);
1657           ip1 = vlib_buffer_get_current (p1);
1658
1659           error0 = error1 = IP6_ERROR_NONE;
1660           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1661
1662           is_locally_originated0 =
1663             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1664           if (PREDICT_TRUE (!is_locally_originated0))
1665             {
1666               i32 hop_limit0 = ip0->hop_limit;
1667
1668               /* Input node should have reject packets with hop limit 0. */
1669               ASSERT (ip0->hop_limit > 0);
1670
1671               hop_limit0 -= 1;
1672
1673               ip0->hop_limit = hop_limit0;
1674
1675               /*
1676                * If the hop count drops below 1 when forwarding, generate
1677                * an ICMP response.
1678                */
1679               if (PREDICT_FALSE (hop_limit0 <= 0))
1680                 {
1681                   error0 = IP6_ERROR_TIME_EXPIRED;
1682                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1683                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1684                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1685                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1686                                                0);
1687                 }
1688             }
1689           else
1690             {
1691               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1692             }
1693           is_locally_originated1 =
1694             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1695           if (PREDICT_TRUE (!is_locally_originated1))
1696             {
1697               i32 hop_limit1 = ip1->hop_limit;
1698
1699               /* Input node should have reject packets with hop limit 0. */
1700               ASSERT (ip1->hop_limit > 0);
1701
1702               hop_limit1 -= 1;
1703
1704               ip1->hop_limit = hop_limit1;
1705
1706               /*
1707                * If the hop count drops below 1 when forwarding, generate
1708                * an ICMP response.
1709                */
1710               if (PREDICT_FALSE (hop_limit1 <= 0))
1711                 {
1712                   error1 = IP6_ERROR_TIME_EXPIRED;
1713                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1714                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1715                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1716                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1717                                                0);
1718                 }
1719             }
1720           else
1721             {
1722               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1723             }
1724           adj0 = adj_get (adj_index0);
1725           adj1 = adj_get (adj_index1);
1726
1727           rw_len0 = adj0[0].rewrite_header.data_bytes;
1728           rw_len1 = adj1[0].rewrite_header.data_bytes;
1729           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1730           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1731
1732           if (do_counters)
1733             {
1734               vlib_increment_combined_counter
1735                 (&adjacency_counters,
1736                  thread_index, adj_index0, 1,
1737                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1738               vlib_increment_combined_counter
1739                 (&adjacency_counters,
1740                  thread_index, adj_index1, 1,
1741                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1742             }
1743
1744           /* Check MTU of outgoing interface. */
1745           u16 ip0_len =
1746             clib_net_to_host_u16 (ip0->payload_length) +
1747             sizeof (ip6_header_t);
1748           u16 ip1_len =
1749             clib_net_to_host_u16 (ip1->payload_length) +
1750             sizeof (ip6_header_t);
1751           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1752             ip0_len = gso_mtu_sz (p0);
1753           if (do_gso && (p1->flags & VNET_BUFFER_F_GSO))
1754             ip1_len = gso_mtu_sz (p1);
1755
1756
1757
1758           ip6_mtu_check (p0, ip0_len,
1759                          adj0[0].rewrite_header.max_l3_packet_bytes,
1760                          is_locally_originated0, &next0, &error0);
1761           ip6_mtu_check (p1, ip1_len,
1762                          adj1[0].rewrite_header.max_l3_packet_bytes,
1763                          is_locally_originated1, &next1, &error1);
1764
1765           /* Don't adjust the buffer for hop count issue; icmp-error node
1766            * wants to see the IP header */
1767           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1768             {
1769               p0->current_data -= rw_len0;
1770               p0->current_length += rw_len0;
1771
1772               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1773               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1774               next0 = adj0[0].rewrite_header.next_index;
1775
1776               if (PREDICT_FALSE
1777                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1778                 vnet_feature_arc_start (lm->output_feature_arc_index,
1779                                         tx_sw_if_index0, &next0, p0);
1780             }
1781           else
1782             {
1783               p0->error = error_node->errors[error0];
1784             }
1785           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1786             {
1787               p1->current_data -= rw_len1;
1788               p1->current_length += rw_len1;
1789
1790               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1791               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1792               next1 = adj1[0].rewrite_header.next_index;
1793
1794               if (PREDICT_FALSE
1795                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1796                 vnet_feature_arc_start (lm->output_feature_arc_index,
1797                                         tx_sw_if_index1, &next1, p1);
1798             }
1799           else
1800             {
1801               p1->error = error_node->errors[error1];
1802             }
1803
1804           if (is_midchain)
1805             {
1806               /* before we paint on the next header, update the L4
1807                * checksums if required, since there's no offload on a tunnel */
1808               calc_checksums (vm, p0);
1809               calc_checksums (vm, p1);
1810             }
1811
1812           /* Guess we are only writing on simple Ethernet header. */
1813           vnet_rewrite_two_headers (adj0[0], adj1[0],
1814                                     ip0, ip1, sizeof (ethernet_header_t));
1815
1816           if (is_midchain)
1817             {
1818               if (adj0->sub_type.midchain.fixup_func)
1819                 adj0->sub_type.midchain.fixup_func
1820                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1821               if (adj1->sub_type.midchain.fixup_func)
1822                 adj1->sub_type.midchain.fixup_func
1823                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1824             }
1825           if (is_mcast)
1826             {
1827               /*
1828                * copy bytes from the IP address into the MAC rewrite
1829                */
1830               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1831                                           adj0->
1832                                           rewrite_header.dst_mcast_offset,
1833                                           &ip0->dst_address.as_u32[3],
1834                                           (u8 *) ip0);
1835               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1836                                           adj1->
1837                                           rewrite_header.dst_mcast_offset,
1838                                           &ip1->dst_address.as_u32[3],
1839                                           (u8 *) ip1);
1840             }
1841
1842           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1843                                            to_next, n_left_to_next,
1844                                            pi0, pi1, next0, next1);
1845         }
1846
1847       while (n_left_from > 0 && n_left_to_next > 0)
1848         {
1849           ip_adjacency_t *adj0;
1850           vlib_buffer_t *p0;
1851           ip6_header_t *ip0;
1852           u32 pi0, rw_len0;
1853           u32 adj_index0, next0, error0;
1854           u32 tx_sw_if_index0;
1855           bool is_locally_originated0;
1856
1857           pi0 = to_next[0] = from[0];
1858
1859           p0 = vlib_get_buffer (vm, pi0);
1860
1861           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1862
1863           adj0 = adj_get (adj_index0);
1864
1865           ip0 = vlib_buffer_get_current (p0);
1866
1867           error0 = IP6_ERROR_NONE;
1868           next0 = IP6_REWRITE_NEXT_DROP;
1869
1870           /* Check hop limit */
1871           is_locally_originated0 =
1872             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1873           if (PREDICT_TRUE (!is_locally_originated0))
1874             {
1875               i32 hop_limit0 = ip0->hop_limit;
1876
1877               ASSERT (ip0->hop_limit > 0);
1878
1879               hop_limit0 -= 1;
1880
1881               ip0->hop_limit = hop_limit0;
1882
1883               if (PREDICT_FALSE (hop_limit0 <= 0))
1884                 {
1885                   /*
1886                    * If the hop count drops below 1 when forwarding, generate
1887                    * an ICMP response.
1888                    */
1889                   error0 = IP6_ERROR_TIME_EXPIRED;
1890                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1891                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1892                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1893                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1894                                                0);
1895                 }
1896             }
1897           else
1898             {
1899               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1900             }
1901
1902           if (is_midchain)
1903             {
1904               calc_checksums (vm, p0);
1905             }
1906
1907           /* Guess we are only writing on simple Ethernet header. */
1908           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1909
1910           /* Update packet buffer attributes/set output interface. */
1911           rw_len0 = adj0[0].rewrite_header.data_bytes;
1912           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1913
1914           if (do_counters)
1915             {
1916               vlib_increment_combined_counter
1917                 (&adjacency_counters,
1918                  thread_index, adj_index0, 1,
1919                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1920             }
1921
1922           /* Check MTU of outgoing interface. */
1923           u16 ip0_len =
1924             clib_net_to_host_u16 (ip0->payload_length) +
1925             sizeof (ip6_header_t);
1926           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1927             ip0_len = gso_mtu_sz (p0);
1928
1929           ip6_mtu_check (p0, ip0_len,
1930                          adj0[0].rewrite_header.max_l3_packet_bytes,
1931                          is_locally_originated0, &next0, &error0);
1932
1933           /* Don't adjust the buffer for hop count issue; icmp-error node
1934            * wants to see the IP header */
1935           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1936             {
1937               p0->current_data -= rw_len0;
1938               p0->current_length += rw_len0;
1939
1940               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1941
1942               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1943               next0 = adj0[0].rewrite_header.next_index;
1944
1945               if (PREDICT_FALSE
1946                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1947                 vnet_feature_arc_start (lm->output_feature_arc_index,
1948                                         tx_sw_if_index0, &next0, p0);
1949             }
1950           else
1951             {
1952               p0->error = error_node->errors[error0];
1953             }
1954
1955           if (is_midchain)
1956             {
1957               if (adj0->sub_type.midchain.fixup_func)
1958                 adj0->sub_type.midchain.fixup_func
1959                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1960             }
1961           if (is_mcast)
1962             {
1963               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1964                                           adj0->
1965                                           rewrite_header.dst_mcast_offset,
1966                                           &ip0->dst_address.as_u32[3],
1967                                           (u8 *) ip0);
1968             }
1969
1970           from += 1;
1971           n_left_from -= 1;
1972           to_next += 1;
1973           n_left_to_next -= 1;
1974
1975           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1976                                            to_next, n_left_to_next,
1977                                            pi0, next0);
1978         }
1979
1980       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1981     }
1982
1983   /* Need to do trace after rewrites to pick up new packet data. */
1984   if (node->flags & VLIB_NODE_FLAG_TRACE)
1985     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1986
1987   return frame->n_vectors;
1988 }
1989
1990 always_inline uword
1991 ip6_rewrite_inline (vlib_main_t * vm,
1992                     vlib_node_runtime_t * node,
1993                     vlib_frame_t * frame,
1994                     int do_counters, int is_midchain, int is_mcast)
1995 {
1996   vnet_main_t *vnm = vnet_get_main ();
1997   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
1998     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
1999                                         is_midchain, is_mcast,
2000                                         1 /* do_gso */ );
2001   else
2002     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2003                                         is_midchain, is_mcast,
2004                                         0 /* no do_gso */ );
2005 }
2006
2007 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2008                                  vlib_node_runtime_t * node,
2009                                  vlib_frame_t * frame)
2010 {
2011   if (adj_are_counters_enabled ())
2012     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2013   else
2014     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2015 }
2016
2017 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2018                                        vlib_node_runtime_t * node,
2019                                        vlib_frame_t * frame)
2020 {
2021   if (adj_are_counters_enabled ())
2022     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2023   else
2024     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2025 }
2026
2027 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2028                                        vlib_node_runtime_t * node,
2029                                        vlib_frame_t * frame)
2030 {
2031   if (adj_are_counters_enabled ())
2032     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2033   else
2034     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2035 }
2036
2037 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2038                                   vlib_node_runtime_t * node,
2039                                   vlib_frame_t * frame)
2040 {
2041   if (adj_are_counters_enabled ())
2042     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2043   else
2044     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2045 }
2046
2047 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2048                                         vlib_node_runtime_t * node,
2049                                         vlib_frame_t * frame)
2050 {
2051   if (adj_are_counters_enabled ())
2052     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2053   else
2054     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2055 }
2056
2057 /* *INDENT-OFF* */
2058 VLIB_REGISTER_NODE (ip6_midchain_node) =
2059 {
2060   .name = "ip6-midchain",
2061   .vector_size = sizeof (u32),
2062   .format_trace = format_ip6_forward_next_trace,
2063   .sibling_of = "ip6-rewrite",
2064   };
2065
2066 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2067 {
2068   .name = "ip6-rewrite",
2069   .vector_size = sizeof (u32),
2070   .format_trace = format_ip6_rewrite_trace,
2071   .n_next_nodes = IP6_REWRITE_N_NEXT,
2072   .next_nodes =
2073   {
2074     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2075     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2076     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2077   },
2078 };
2079
2080 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2081   .name = "ip6-rewrite-bcast",
2082   .vector_size = sizeof (u32),
2083
2084   .format_trace = format_ip6_rewrite_trace,
2085   .sibling_of = "ip6-rewrite",
2086 };
2087
2088 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2089 {
2090   .name = "ip6-rewrite-mcast",
2091   .vector_size = sizeof (u32),
2092   .format_trace = format_ip6_rewrite_trace,
2093   .sibling_of = "ip6-rewrite",
2094 };
2095
2096
2097 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2098 {
2099   .name = "ip6-mcast-midchain",
2100   .vector_size = sizeof (u32),
2101   .format_trace = format_ip6_rewrite_trace,
2102   .sibling_of = "ip6-rewrite",
2103 };
2104
2105 /* *INDENT-ON* */
2106
2107 /*
2108  * Hop-by-Hop handling
2109  */
2110 #ifndef CLIB_MARCH_VARIANT
2111 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2112 #endif /* CLIB_MARCH_VARIANT */
2113
2114 #define foreach_ip6_hop_by_hop_error \
2115 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2116 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2117 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2118
2119 /* *INDENT-OFF* */
2120 typedef enum
2121 {
2122 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2123   foreach_ip6_hop_by_hop_error
2124 #undef _
2125   IP6_HOP_BY_HOP_N_ERROR,
2126 } ip6_hop_by_hop_error_t;
2127 /* *INDENT-ON* */
2128
2129 /*
2130  * Primary h-b-h handler trace support
2131  * We work pretty hard on the problem for obvious reasons
2132  */
2133 typedef struct
2134 {
2135   u32 next_index;
2136   u32 trace_len;
2137   u8 option_data[256];
2138 } ip6_hop_by_hop_trace_t;
2139
2140 extern vlib_node_registration_t ip6_hop_by_hop_node;
2141
2142 static char *ip6_hop_by_hop_error_strings[] = {
2143 #define _(sym,string) string,
2144   foreach_ip6_hop_by_hop_error
2145 #undef _
2146 };
2147
2148 #ifndef CLIB_MARCH_VARIANT
2149 u8 *
2150 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2151 {
2152   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2153   int total_len = va_arg (*args, int);
2154   ip6_hop_by_hop_option_t *opt0, *limit0;
2155   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2156   u8 type0;
2157
2158   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2159               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2160
2161   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2162   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2163
2164   while (opt0 < limit0)
2165     {
2166       type0 = opt0->type;
2167       switch (type0)
2168         {
2169         case 0:         /* Pad, just stop */
2170           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2171           break;
2172
2173         default:
2174           if (hm->trace[type0])
2175             {
2176               s = (*hm->trace[type0]) (s, opt0);
2177             }
2178           else
2179             {
2180               s =
2181                 format (s, "\n    unrecognized option %d length %d", type0,
2182                         opt0->length);
2183             }
2184           opt0 =
2185             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2186                                          sizeof (ip6_hop_by_hop_option_t));
2187           break;
2188         }
2189     }
2190   return s;
2191 }
2192 #endif
2193
2194 static u8 *
2195 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2196 {
2197   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2198   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2199   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2200   ip6_hop_by_hop_header_t *hbh0;
2201   ip6_hop_by_hop_option_t *opt0, *limit0;
2202   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2203
2204   u8 type0;
2205
2206   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2207
2208   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2209               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2210
2211   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2212   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2213
2214   while (opt0 < limit0)
2215     {
2216       type0 = opt0->type;
2217       switch (type0)
2218         {
2219         case 0:         /* Pad, just stop */
2220           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2221           break;
2222
2223         default:
2224           if (hm->trace[type0])
2225             {
2226               s = (*hm->trace[type0]) (s, opt0);
2227             }
2228           else
2229             {
2230               s =
2231                 format (s, "\n    unrecognized option %d length %d", type0,
2232                         opt0->length);
2233             }
2234           opt0 =
2235             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2236                                          sizeof (ip6_hop_by_hop_option_t));
2237           break;
2238         }
2239     }
2240   return s;
2241 }
2242
2243 always_inline u8
2244 ip6_scan_hbh_options (vlib_buffer_t * b0,
2245                       ip6_header_t * ip0,
2246                       ip6_hop_by_hop_header_t * hbh0,
2247                       ip6_hop_by_hop_option_t * opt0,
2248                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2249 {
2250   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2251   u8 type0;
2252   u8 error0 = 0;
2253
2254   while (opt0 < limit0)
2255     {
2256       type0 = opt0->type;
2257       switch (type0)
2258         {
2259         case 0:         /* Pad1 */
2260           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2261           continue;
2262         case 1:         /* PadN */
2263           break;
2264         default:
2265           if (hm->options[type0])
2266             {
2267               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2268                 {
2269                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2270                   return (error0);
2271                 }
2272             }
2273           else
2274             {
2275               /* Unrecognized mandatory option, check the two high order bits */
2276               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2277                 {
2278                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2279                   break;
2280                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2281                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2282                   *next0 = IP_LOOKUP_NEXT_DROP;
2283                   break;
2284                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2285                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2286                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2287                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2288                                                ICMP6_parameter_problem_unrecognized_option,
2289                                                (u8 *) opt0 - (u8 *) ip0);
2290                   break;
2291                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2292                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2293                   if (!ip6_address_is_multicast (&ip0->dst_address))
2294                     {
2295                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2296                       icmp6_error_set_vnet_buffer (b0,
2297                                                    ICMP6_parameter_problem,
2298                                                    ICMP6_parameter_problem_unrecognized_option,
2299                                                    (u8 *) opt0 - (u8 *) ip0);
2300                     }
2301                   else
2302                     {
2303                       *next0 = IP_LOOKUP_NEXT_DROP;
2304                     }
2305                   break;
2306                 }
2307               return (error0);
2308             }
2309         }
2310       opt0 =
2311         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2312                                      sizeof (ip6_hop_by_hop_option_t));
2313     }
2314   return (error0);
2315 }
2316
2317 /*
2318  * Process the Hop-by-Hop Options header
2319  */
2320 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2321                                     vlib_node_runtime_t * node,
2322                                     vlib_frame_t * frame)
2323 {
2324   vlib_node_runtime_t *error_node =
2325     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2326   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2327   u32 n_left_from, *from, *to_next;
2328   ip_lookup_next_t next_index;
2329
2330   from = vlib_frame_vector_args (frame);
2331   n_left_from = frame->n_vectors;
2332   next_index = node->cached_next_index;
2333
2334   while (n_left_from > 0)
2335     {
2336       u32 n_left_to_next;
2337
2338       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2339
2340       while (n_left_from >= 4 && n_left_to_next >= 2)
2341         {
2342           u32 bi0, bi1;
2343           vlib_buffer_t *b0, *b1;
2344           u32 next0, next1;
2345           ip6_header_t *ip0, *ip1;
2346           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2347           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2348           u8 error0 = 0, error1 = 0;
2349
2350           /* Prefetch next iteration. */
2351           {
2352             vlib_buffer_t *p2, *p3;
2353
2354             p2 = vlib_get_buffer (vm, from[2]);
2355             p3 = vlib_get_buffer (vm, from[3]);
2356
2357             vlib_prefetch_buffer_header (p2, LOAD);
2358             vlib_prefetch_buffer_header (p3, LOAD);
2359
2360             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2361             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2362           }
2363
2364           /* Speculatively enqueue b0, b1 to the current next frame */
2365           to_next[0] = bi0 = from[0];
2366           to_next[1] = bi1 = from[1];
2367           from += 2;
2368           to_next += 2;
2369           n_left_from -= 2;
2370           n_left_to_next -= 2;
2371
2372           b0 = vlib_get_buffer (vm, bi0);
2373           b1 = vlib_get_buffer (vm, bi1);
2374
2375           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2376           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2377           ip_adjacency_t *adj0 = adj_get (adj_index0);
2378           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2379           ip_adjacency_t *adj1 = adj_get (adj_index1);
2380
2381           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2382           next0 = adj0->lookup_next_index;
2383           next1 = adj1->lookup_next_index;
2384
2385           ip0 = vlib_buffer_get_current (b0);
2386           ip1 = vlib_buffer_get_current (b1);
2387           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2388           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2389           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2390           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2391           limit0 =
2392             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2393                                          ((hbh0->length + 1) << 3));
2394           limit1 =
2395             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2396                                          ((hbh1->length + 1) << 3));
2397
2398           /*
2399            * Basic validity checks
2400            */
2401           if ((hbh0->length + 1) << 3 >
2402               clib_net_to_host_u16 (ip0->payload_length))
2403             {
2404               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2405               next0 = IP_LOOKUP_NEXT_DROP;
2406               goto outdual;
2407             }
2408           /* Scan the set of h-b-h options, process ones that we understand */
2409           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2410
2411           if ((hbh1->length + 1) << 3 >
2412               clib_net_to_host_u16 (ip1->payload_length))
2413             {
2414               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2415               next1 = IP_LOOKUP_NEXT_DROP;
2416               goto outdual;
2417             }
2418           /* Scan the set of h-b-h options, process ones that we understand */
2419           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2420
2421         outdual:
2422           /* Has the classifier flagged this buffer for special treatment? */
2423           if (PREDICT_FALSE
2424               ((error0 == 0)
2425                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2426             next0 = hm->next_override;
2427
2428           /* Has the classifier flagged this buffer for special treatment? */
2429           if (PREDICT_FALSE
2430               ((error1 == 0)
2431                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2432             next1 = hm->next_override;
2433
2434           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2435             {
2436               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2437                 {
2438                   ip6_hop_by_hop_trace_t *t =
2439                     vlib_add_trace (vm, node, b0, sizeof (*t));
2440                   u32 trace_len = (hbh0->length + 1) << 3;
2441                   t->next_index = next0;
2442                   /* Capture the h-b-h option verbatim */
2443                   trace_len =
2444                     trace_len <
2445                     ARRAY_LEN (t->option_data) ? trace_len :
2446                     ARRAY_LEN (t->option_data);
2447                   t->trace_len = trace_len;
2448                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2449                 }
2450               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2451                 {
2452                   ip6_hop_by_hop_trace_t *t =
2453                     vlib_add_trace (vm, node, b1, sizeof (*t));
2454                   u32 trace_len = (hbh1->length + 1) << 3;
2455                   t->next_index = next1;
2456                   /* Capture the h-b-h option verbatim */
2457                   trace_len =
2458                     trace_len <
2459                     ARRAY_LEN (t->option_data) ? trace_len :
2460                     ARRAY_LEN (t->option_data);
2461                   t->trace_len = trace_len;
2462                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2463                 }
2464
2465             }
2466
2467           b0->error = error_node->errors[error0];
2468           b1->error = error_node->errors[error1];
2469
2470           /* verify speculative enqueue, maybe switch current next frame */
2471           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2472                                            n_left_to_next, bi0, bi1, next0,
2473                                            next1);
2474         }
2475
2476       while (n_left_from > 0 && n_left_to_next > 0)
2477         {
2478           u32 bi0;
2479           vlib_buffer_t *b0;
2480           u32 next0;
2481           ip6_header_t *ip0;
2482           ip6_hop_by_hop_header_t *hbh0;
2483           ip6_hop_by_hop_option_t *opt0, *limit0;
2484           u8 error0 = 0;
2485
2486           /* Speculatively enqueue b0 to the current next frame */
2487           bi0 = from[0];
2488           to_next[0] = bi0;
2489           from += 1;
2490           to_next += 1;
2491           n_left_from -= 1;
2492           n_left_to_next -= 1;
2493
2494           b0 = vlib_get_buffer (vm, bi0);
2495           /*
2496            * Default use the next_index from the adjacency.
2497            * A HBH option rarely redirects to a different node
2498            */
2499           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2500           ip_adjacency_t *adj0 = adj_get (adj_index0);
2501           next0 = adj0->lookup_next_index;
2502
2503           ip0 = vlib_buffer_get_current (b0);
2504           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2505           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2506           limit0 =
2507             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2508                                          ((hbh0->length + 1) << 3));
2509
2510           /*
2511            * Basic validity checks
2512            */
2513           if ((hbh0->length + 1) << 3 >
2514               clib_net_to_host_u16 (ip0->payload_length))
2515             {
2516               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2517               next0 = IP_LOOKUP_NEXT_DROP;
2518               goto out0;
2519             }
2520
2521           /* Scan the set of h-b-h options, process ones that we understand */
2522           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2523
2524         out0:
2525           /* Has the classifier flagged this buffer for special treatment? */
2526           if (PREDICT_FALSE
2527               ((error0 == 0)
2528                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2529             next0 = hm->next_override;
2530
2531           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2532             {
2533               ip6_hop_by_hop_trace_t *t =
2534                 vlib_add_trace (vm, node, b0, sizeof (*t));
2535               u32 trace_len = (hbh0->length + 1) << 3;
2536               t->next_index = next0;
2537               /* Capture the h-b-h option verbatim */
2538               trace_len =
2539                 trace_len <
2540                 ARRAY_LEN (t->option_data) ? trace_len :
2541                 ARRAY_LEN (t->option_data);
2542               t->trace_len = trace_len;
2543               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2544             }
2545
2546           b0->error = error_node->errors[error0];
2547
2548           /* verify speculative enqueue, maybe switch current next frame */
2549           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2550                                            n_left_to_next, bi0, next0);
2551         }
2552       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2553     }
2554   return frame->n_vectors;
2555 }
2556
2557 /* *INDENT-OFF* */
2558 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2559 {
2560   .name = "ip6-hop-by-hop",
2561   .sibling_of = "ip6-lookup",
2562   .vector_size = sizeof (u32),
2563   .format_trace = format_ip6_hop_by_hop_trace,
2564   .type = VLIB_NODE_TYPE_INTERNAL,
2565   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2566   .error_strings = ip6_hop_by_hop_error_strings,
2567   .n_next_nodes = 0,
2568 };
2569 /* *INDENT-ON* */
2570
2571 static clib_error_t *
2572 ip6_hop_by_hop_init (vlib_main_t * vm)
2573 {
2574   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2575   clib_memset (hm->options, 0, sizeof (hm->options));
2576   clib_memset (hm->trace, 0, sizeof (hm->trace));
2577   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2578   return (0);
2579 }
2580
2581 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2582
2583 #ifndef CLIB_MARCH_VARIANT
2584 void
2585 ip6_hbh_set_next_override (uword next)
2586 {
2587   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2588
2589   hm->next_override = next;
2590 }
2591
2592 int
2593 ip6_hbh_register_option (u8 option,
2594                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2595                                       ip6_hop_by_hop_option_t * opt),
2596                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2597 {
2598   ip6_main_t *im = &ip6_main;
2599   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2600
2601   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2602
2603   /* Already registered */
2604   if (hm->options[option])
2605     return (-1);
2606
2607   hm->options[option] = options;
2608   hm->trace[option] = trace;
2609
2610   /* Set global variable */
2611   im->hbh_enabled = 1;
2612
2613   return (0);
2614 }
2615
2616 int
2617 ip6_hbh_unregister_option (u8 option)
2618 {
2619   ip6_main_t *im = &ip6_main;
2620   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2621
2622   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2623
2624   /* Not registered */
2625   if (!hm->options[option])
2626     return (-1);
2627
2628   hm->options[option] = NULL;
2629   hm->trace[option] = NULL;
2630
2631   /* Disable global knob if this was the last option configured */
2632   int i;
2633   bool found = false;
2634   for (i = 0; i < 256; i++)
2635     {
2636       if (hm->options[option])
2637         {
2638           found = true;
2639           break;
2640         }
2641     }
2642   if (!found)
2643     im->hbh_enabled = 0;
2644
2645   return (0);
2646 }
2647
2648 /* Global IP6 main. */
2649 ip6_main_t ip6_main;
2650 #endif
2651
2652 static clib_error_t *
2653 ip6_lookup_init (vlib_main_t * vm)
2654 {
2655   ip6_main_t *im = &ip6_main;
2656   clib_error_t *error;
2657   uword i;
2658
2659   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2660     return error;
2661
2662   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2663     {
2664       u32 j, i0, i1;
2665
2666       i0 = i / 32;
2667       i1 = i % 32;
2668
2669       for (j = 0; j < i0; j++)
2670         im->fib_masks[i].as_u32[j] = ~0;
2671
2672       if (i1)
2673         im->fib_masks[i].as_u32[i0] =
2674           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2675     }
2676
2677   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2678
2679   if (im->lookup_table_nbuckets == 0)
2680     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2681
2682   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2683
2684   if (im->lookup_table_size == 0)
2685     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2686
2687   clib_bihash_init_24_8 (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2688                          "ip6 FIB fwding table",
2689                          im->lookup_table_nbuckets, im->lookup_table_size);
2690   clib_bihash_init_24_8 (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2691                          "ip6 FIB non-fwding table",
2692                          im->lookup_table_nbuckets, im->lookup_table_size);
2693   clib_bihash_init_40_8 (&im->ip6_mtable.ip6_mhash,
2694                          "ip6 mFIB table",
2695                          im->lookup_table_nbuckets, im->lookup_table_size);
2696
2697   /* Create FIB with index 0 and table id of 0. */
2698   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2699                                      FIB_SOURCE_DEFAULT_ROUTE);
2700   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2701                                       MFIB_SOURCE_DEFAULT_ROUTE);
2702
2703   {
2704     pg_node_t *pn;
2705     pn = pg_get_node (ip6_lookup_node.index);
2706     pn->unformat_edit = unformat_pg_ip6_header;
2707   }
2708
2709   /* Unless explicitly configured, don't process HBH options */
2710   im->hbh_enabled = 0;
2711
2712   {
2713     icmp6_neighbor_solicitation_header_t p;
2714
2715     clib_memset (&p, 0, sizeof (p));
2716
2717     p.ip.ip_version_traffic_class_and_flow_label =
2718       clib_host_to_net_u32 (0x6 << 28);
2719     p.ip.payload_length =
2720       clib_host_to_net_u16 (sizeof (p) -
2721                             STRUCT_OFFSET_OF
2722                             (icmp6_neighbor_solicitation_header_t, neighbor));
2723     p.ip.protocol = IP_PROTOCOL_ICMP6;
2724     p.ip.hop_limit = 255;
2725     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2726
2727     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2728
2729     p.link_layer_option.header.type =
2730       ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2731     p.link_layer_option.header.n_data_u64s =
2732       sizeof (p.link_layer_option) / sizeof (u64);
2733
2734     vlib_packet_template_init (vm,
2735                                &im->discover_neighbor_packet_template,
2736                                &p, sizeof (p),
2737                                /* alloc chunk size */ 8,
2738                                "ip6 neighbor discovery");
2739   }
2740
2741   return error;
2742 }
2743
2744 VLIB_INIT_FUNCTION (ip6_lookup_init);
2745
2746 static clib_error_t *
2747 test_ip6_link_command_fn (vlib_main_t * vm,
2748                           unformat_input_t * input, vlib_cli_command_t * cmd)
2749 {
2750   u8 mac[6];
2751   ip6_address_t _a, *a = &_a;
2752
2753   if (unformat (input, "%U", unformat_ethernet_address, mac))
2754     {
2755       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2756       vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
2757       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2758       vlib_cli_output (vm, "Original MAC address: %U",
2759                        format_ethernet_address, mac);
2760     }
2761
2762   return 0;
2763 }
2764
2765 /*?
2766  * This command converts the given MAC Address into an IPv6 link-local
2767  * address.
2768  *
2769  * @cliexpar
2770  * Example of how to create an IPv6 link-local address:
2771  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2772  * Link local address: fe80::14d9:e0ff:fe91:7986
2773  * Original MAC address: 16:d9:e0:91:79:86
2774  * @cliexend
2775 ?*/
2776 /* *INDENT-OFF* */
2777 VLIB_CLI_COMMAND (test_link_command, static) =
2778 {
2779   .path = "test ip6 link",
2780   .function = test_ip6_link_command_fn,
2781   .short_help = "test ip6 link <mac-address>",
2782 };
2783 /* *INDENT-ON* */
2784
2785 #ifndef CLIB_MARCH_VARIANT
2786 int
2787 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2788 {
2789   u32 fib_index;
2790
2791   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2792
2793   if (~0 == fib_index)
2794     return VNET_API_ERROR_NO_SUCH_FIB;
2795
2796   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2797                                   flow_hash_config);
2798
2799   return 0;
2800 }
2801 #endif
2802
2803 static clib_error_t *
2804 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2805                               unformat_input_t * input,
2806                               vlib_cli_command_t * cmd)
2807 {
2808   int matched = 0;
2809   u32 table_id = 0;
2810   u32 flow_hash_config = 0;
2811   int rv;
2812
2813   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2814     {
2815       if (unformat (input, "table %d", &table_id))
2816         matched = 1;
2817 #define _(a,v) \
2818     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2819       foreach_flow_hash_bit
2820 #undef _
2821         else
2822         break;
2823     }
2824
2825   if (matched == 0)
2826     return clib_error_return (0, "unknown input `%U'",
2827                               format_unformat_error, input);
2828
2829   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2830   switch (rv)
2831     {
2832     case 0:
2833       break;
2834
2835     case -1:
2836       return clib_error_return (0, "no such FIB table %d", table_id);
2837
2838     default:
2839       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2840       break;
2841     }
2842
2843   return 0;
2844 }
2845
2846 /*?
2847  * Configure the set of IPv6 fields used by the flow hash.
2848  *
2849  * @cliexpar
2850  * @parblock
2851  * Example of how to set the flow hash on a given table:
2852  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2853  *
2854  * Example of display the configured flow hash:
2855  * @cliexstart{show ip6 fib}
2856  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2857  * @::/0
2858  *   unicast-ip6-chain
2859  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2860  *     [0] [@0]: dpo-drop ip6
2861  * fe80::/10
2862  *   unicast-ip6-chain
2863  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2864  *     [0] [@2]: dpo-receive
2865  * ff02::1/128
2866  *   unicast-ip6-chain
2867  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2868  *     [0] [@2]: dpo-receive
2869  * ff02::2/128
2870  *   unicast-ip6-chain
2871  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2872  *     [0] [@2]: dpo-receive
2873  * ff02::16/128
2874  *   unicast-ip6-chain
2875  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2876  *     [0] [@2]: dpo-receive
2877  * ff02::1:ff00:0/104
2878  *   unicast-ip6-chain
2879  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2880  *     [0] [@2]: dpo-receive
2881  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2882  * @::/0
2883  *   unicast-ip6-chain
2884  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2885  *     [0] [@0]: dpo-drop ip6
2886  * @::a:1:1:0:4/126
2887  *   unicast-ip6-chain
2888  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2889  *     [0] [@4]: ipv6-glean: af_packet0
2890  * @::a:1:1:0:7/128
2891  *   unicast-ip6-chain
2892  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2893  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2894  * fe80::/10
2895  *   unicast-ip6-chain
2896  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2897  *     [0] [@2]: dpo-receive
2898  * fe80::fe:3eff:fe3e:9222/128
2899  *   unicast-ip6-chain
2900  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2901  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2902  * ff02::1/128
2903  *   unicast-ip6-chain
2904  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2905  *     [0] [@2]: dpo-receive
2906  * ff02::2/128
2907  *   unicast-ip6-chain
2908  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2909  *     [0] [@2]: dpo-receive
2910  * ff02::16/128
2911  *   unicast-ip6-chain
2912  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2913  *     [0] [@2]: dpo-receive
2914  * ff02::1:ff00:0/104
2915  *   unicast-ip6-chain
2916  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2917  *     [0] [@2]: dpo-receive
2918  * @cliexend
2919  * @endparblock
2920 ?*/
2921 /* *INDENT-OFF* */
2922 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
2923 {
2924   .path = "set ip6 flow-hash",
2925   .short_help =
2926   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2927   .function = set_ip6_flow_hash_command_fn,
2928 };
2929 /* *INDENT-ON* */
2930
2931 static clib_error_t *
2932 show_ip6_local_command_fn (vlib_main_t * vm,
2933                            unformat_input_t * input, vlib_cli_command_t * cmd)
2934 {
2935   ip6_main_t *im = &ip6_main;
2936   ip_lookup_main_t *lm = &im->lookup_main;
2937   int i;
2938
2939   vlib_cli_output (vm, "Protocols handled by ip6_local");
2940   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2941     {
2942       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2943         {
2944
2945           u32 node_index = vlib_get_node (vm,
2946                                           ip6_local_node.index)->
2947             next_nodes[lm->local_next_by_ip_protocol[i]];
2948           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2949                            node_index);
2950         }
2951     }
2952   return 0;
2953 }
2954
2955
2956
2957 /*?
2958  * Display the set of protocols handled by the local IPv6 stack.
2959  *
2960  * @cliexpar
2961  * Example of how to display local protocol table:
2962  * @cliexstart{show ip6 local}
2963  * Protocols handled by ip6_local
2964  * 17
2965  * 43
2966  * 58
2967  * 115
2968  * @cliexend
2969 ?*/
2970 /* *INDENT-OFF* */
2971 VLIB_CLI_COMMAND (show_ip6_local, static) =
2972 {
2973   .path = "show ip6 local",
2974   .function = show_ip6_local_command_fn,
2975   .short_help = "show ip6 local",
2976 };
2977 /* *INDENT-ON* */
2978
2979 #ifndef CLIB_MARCH_VARIANT
2980 int
2981 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2982                              u32 table_index)
2983 {
2984   vnet_main_t *vnm = vnet_get_main ();
2985   vnet_interface_main_t *im = &vnm->interface_main;
2986   ip6_main_t *ipm = &ip6_main;
2987   ip_lookup_main_t *lm = &ipm->lookup_main;
2988   vnet_classify_main_t *cm = &vnet_classify_main;
2989   ip6_address_t *if_addr;
2990
2991   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2992     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2993
2994   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2995     return VNET_API_ERROR_NO_SUCH_ENTRY;
2996
2997   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2998   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2999
3000   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3001
3002   if (NULL != if_addr)
3003     {
3004       fib_prefix_t pfx = {
3005         .fp_len = 128,
3006         .fp_proto = FIB_PROTOCOL_IP6,
3007         .fp_addr.ip6 = *if_addr,
3008       };
3009       u32 fib_index;
3010
3011       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3012                                                        sw_if_index);
3013
3014
3015       if (table_index != (u32) ~ 0)
3016         {
3017           dpo_id_t dpo = DPO_INVALID;
3018
3019           dpo_set (&dpo,
3020                    DPO_CLASSIFY,
3021                    DPO_PROTO_IP6,
3022                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3023
3024           fib_table_entry_special_dpo_add (fib_index,
3025                                            &pfx,
3026                                            FIB_SOURCE_CLASSIFY,
3027                                            FIB_ENTRY_FLAG_NONE, &dpo);
3028           dpo_reset (&dpo);
3029         }
3030       else
3031         {
3032           fib_table_entry_special_remove (fib_index,
3033                                           &pfx, FIB_SOURCE_CLASSIFY);
3034         }
3035     }
3036
3037   return 0;
3038 }
3039 #endif
3040
3041 static clib_error_t *
3042 set_ip6_classify_command_fn (vlib_main_t * vm,
3043                              unformat_input_t * input,
3044                              vlib_cli_command_t * cmd)
3045 {
3046   u32 table_index = ~0;
3047   int table_index_set = 0;
3048   u32 sw_if_index = ~0;
3049   int rv;
3050
3051   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3052     {
3053       if (unformat (input, "table-index %d", &table_index))
3054         table_index_set = 1;
3055       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3056                          vnet_get_main (), &sw_if_index))
3057         ;
3058       else
3059         break;
3060     }
3061
3062   if (table_index_set == 0)
3063     return clib_error_return (0, "classify table-index must be specified");
3064
3065   if (sw_if_index == ~0)
3066     return clib_error_return (0, "interface / subif must be specified");
3067
3068   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3069
3070   switch (rv)
3071     {
3072     case 0:
3073       break;
3074
3075     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3076       return clib_error_return (0, "No such interface");
3077
3078     case VNET_API_ERROR_NO_SUCH_ENTRY:
3079       return clib_error_return (0, "No such classifier table");
3080     }
3081   return 0;
3082 }
3083
3084 /*?
3085  * Assign a classification table to an interface. The classification
3086  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3087  * commands. Once the table is create, use this command to filter packets
3088  * on an interface.
3089  *
3090  * @cliexpar
3091  * Example of how to assign a classification table to an interface:
3092  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3093 ?*/
3094 /* *INDENT-OFF* */
3095 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3096 {
3097   .path = "set ip6 classify",
3098   .short_help =
3099   "set ip6 classify intfc <interface> table-index <classify-idx>",
3100   .function = set_ip6_classify_command_fn,
3101 };
3102 /* *INDENT-ON* */
3103
3104 static clib_error_t *
3105 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3106 {
3107   ip6_main_t *im = &ip6_main;
3108   uword heapsize = 0;
3109   u32 tmp;
3110   u32 nbuckets = 0;
3111
3112   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3113     {
3114       if (unformat (input, "hash-buckets %d", &tmp))
3115         nbuckets = tmp;
3116       else if (unformat (input, "heap-size %U",
3117                          unformat_memory_size, &heapsize))
3118         ;
3119       else
3120         return clib_error_return (0, "unknown input '%U'",
3121                                   format_unformat_error, input);
3122     }
3123
3124   im->lookup_table_nbuckets = nbuckets;
3125   im->lookup_table_size = heapsize;
3126
3127   return 0;
3128 }
3129
3130 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3131
3132 /*
3133  * fd.io coding-style-patch-verification: ON
3134  *
3135  * Local Variables:
3136  * eval: (c-set-style "gnu")
3137  * End:
3138  */