ip: fix ip6/udp checksum for pkts using buffer chaining
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_neighbor.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 #ifndef CLIB_MARCH_VARIANT
54 #include <vppinfra/bihash_template.c>
55 #endif
56 #include <vnet/ip/ip6_forward.h>
57 #include <vnet/interface_output.h>
58
59 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
60 #define OI_DECAP   0x80000000
61
62 static void
63 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
64                           ip6_main_t * im, u32 fib_index,
65                           ip_interface_address_t * a)
66 {
67   ip_lookup_main_t *lm = &im->lookup_main;
68   ip6_address_t *address = ip_interface_address_get_address (lm, a);
69   fib_prefix_t pfx = {
70     .fp_len = a->address_length,
71     .fp_proto = FIB_PROTOCOL_IP6,
72     .fp_addr.ip6 = *address,
73   };
74
75   if (a->address_length < 128)
76     {
77       fib_table_entry_update_one_path (fib_index,
78                                        &pfx,
79                                        FIB_SOURCE_INTERFACE,
80                                        (FIB_ENTRY_FLAG_CONNECTED |
81                                         FIB_ENTRY_FLAG_ATTACHED),
82                                        DPO_PROTO_IP6,
83                                        /* No next-hop address */
84                                        NULL, sw_if_index,
85                                        /* invalid FIB index */
86                                        ~0, 1,
87                                        /* no label stack */
88                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
89     }
90
91   pfx.fp_len = 128;
92   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
93     {
94       u32 classify_table_index =
95         lm->classify_table_index_by_sw_if_index[sw_if_index];
96       if (classify_table_index != (u32) ~ 0)
97         {
98           dpo_id_t dpo = DPO_INVALID;
99
100           dpo_set (&dpo,
101                    DPO_CLASSIFY,
102                    DPO_PROTO_IP6,
103                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
104
105           fib_table_entry_special_dpo_add (fib_index,
106                                            &pfx,
107                                            FIB_SOURCE_CLASSIFY,
108                                            FIB_ENTRY_FLAG_NONE, &dpo);
109           dpo_reset (&dpo);
110         }
111     }
112
113   fib_table_entry_update_one_path (fib_index, &pfx,
114                                    FIB_SOURCE_INTERFACE,
115                                    (FIB_ENTRY_FLAG_CONNECTED |
116                                     FIB_ENTRY_FLAG_LOCAL),
117                                    DPO_PROTO_IP6,
118                                    &pfx.fp_addr,
119                                    sw_if_index, ~0,
120                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
121 }
122
123 static void
124 ip6_del_interface_routes (ip6_main_t * im,
125                           u32 fib_index,
126                           ip6_address_t * address, u32 address_length)
127 {
128   fib_prefix_t pfx = {
129     .fp_len = address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   if (pfx.fp_len < 128)
135     {
136       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
137
138     }
139
140   pfx.fp_len = 128;
141   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
142 }
143
144 #ifndef CLIB_MARCH_VARIANT
145 void
146 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
147 {
148   ip6_main_t *im = &ip6_main;
149
150   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
151
152   /*
153    * enable/disable only on the 1<->0 transition
154    */
155   if (is_enable)
156     {
157       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
158         return;
159     }
160   else
161     {
162       /* The ref count is 0 when an address is removed from an interface that has
163        * no address - this is not a ciritical error */
164       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
165           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
166         return;
167     }
168
169   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
170                                !is_enable, 0, 0);
171
172   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
173                                sw_if_index, !is_enable, 0, 0);
174 }
175
176 /* get first interface address */
177 ip6_address_t *
178 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
179 {
180   ip_lookup_main_t *lm = &im->lookup_main;
181   ip_interface_address_t *ia = 0;
182   ip6_address_t *result = 0;
183
184   /* *INDENT-OFF* */
185   foreach_ip_interface_address (lm, ia, sw_if_index,
186                                 1 /* honor unnumbered */,
187   ({
188     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
189     result = a;
190     break;
191   }));
192   /* *INDENT-ON* */
193   return result;
194 }
195
196 clib_error_t *
197 ip6_add_del_interface_address (vlib_main_t * vm,
198                                u32 sw_if_index,
199                                ip6_address_t * address,
200                                u32 address_length, u32 is_del)
201 {
202   vnet_main_t *vnm = vnet_get_main ();
203   ip6_main_t *im = &ip6_main;
204   ip_lookup_main_t *lm = &im->lookup_main;
205   clib_error_t *error;
206   u32 if_address_index;
207   ip6_address_fib_t ip6_af, *addr_fib = 0;
208   ip6_address_t ll_addr;
209
210   /* local0 interface doesn't support IP addressing */
211   if (sw_if_index == 0)
212     {
213       return
214         clib_error_create ("local0 interface doesn't support IP addressing");
215     }
216
217   if (ip6_address_is_link_local_unicast (address))
218     {
219       if (address_length != 128)
220         {
221           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
222           return
223             clib_error_create
224             ("prefix length of link-local address must be 128");
225         }
226       if (!is_del)
227         {
228           return ip6_neighbor_set_link_local_address (vm, sw_if_index,
229                                                       address);
230         }
231       else
232         {
233           ll_addr = ip6_neighbor_get_link_local_address (sw_if_index);
234           if (ip6_address_is_equal (&ll_addr, address))
235             {
236               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
237               return clib_error_create ("address not deletable");
238             }
239           else
240             {
241               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
242               return clib_error_create ("address not found");
243             }
244         }
245     }
246
247   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
248   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
249
250   ip6_addr_fib_init (&ip6_af, address,
251                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
252   vec_add1 (addr_fib, ip6_af);
253
254   /* *INDENT-OFF* */
255   if (!is_del)
256     {
257       /* When adding an address check that it does not conflict
258          with an existing address on any interface in this table. */
259       ip_interface_address_t *ia;
260       vnet_sw_interface_t *sif;
261
262       pool_foreach(sif, vnm->interface_main.sw_interfaces,
263       ({
264           if (im->fib_index_by_sw_if_index[sw_if_index] ==
265               im->fib_index_by_sw_if_index[sif->sw_if_index])
266             {
267               foreach_ip_interface_address
268                 (&im->lookup_main, ia, sif->sw_if_index,
269                  0 /* honor unnumbered */ ,
270                  ({
271                    ip6_address_t * x =
272                      ip_interface_address_get_address
273                      (&im->lookup_main, ia);
274                    if (ip6_destination_matches_route
275                        (im, address, x, ia->address_length) ||
276                        ip6_destination_matches_route (im,
277                                                       x,
278                                                       address,
279                                                       address_length))
280                      {
281                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
282                        return
283                          clib_error_create
284                          ("failed to add %U which conflicts with %U for interface %U",
285                           format_ip6_address_and_length, address,
286                           address_length,
287                           format_ip6_address_and_length, x,
288                           ia->address_length,
289                           format_vnet_sw_if_index_name, vnm,
290                           sif->sw_if_index);
291                      }
292                  }));
293             }
294       }));
295     }
296   /* *INDENT-ON* */
297
298   {
299     uword elts_before = pool_elts (lm->if_address_pool);
300
301     error = ip_interface_address_add_del
302       (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
303     if (error)
304       goto done;
305
306     /* Pool did not grow: add duplicate address. */
307     if (elts_before == pool_elts (lm->if_address_pool))
308       goto done;
309   }
310
311   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
312
313   if (is_del)
314     ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
315   else
316     ip6_add_interface_routes (vnm, sw_if_index,
317                               im, ip6_af.fib_index,
318                               pool_elt_at_index (lm->if_address_pool,
319                                                  if_address_index));
320
321   {
322     ip6_add_del_interface_address_callback_t *cb;
323     vec_foreach (cb, im->add_del_interface_address_callbacks)
324       cb->function (im, cb->function_opaque, sw_if_index,
325                     address, address_length, if_address_index, is_del);
326   }
327
328 done:
329   vec_free (addr_fib);
330   return error;
331 }
332
333 #endif
334
335 static clib_error_t *
336 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
337 {
338   ip6_main_t *im = &ip6_main;
339   ip_interface_address_t *ia;
340   ip6_address_t *a;
341   u32 is_admin_up, fib_index;
342
343   /* Fill in lookup tables with default table (0). */
344   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
345
346   vec_validate_init_empty (im->
347                            lookup_main.if_address_pool_index_by_sw_if_index,
348                            sw_if_index, ~0);
349
350   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
351
352   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
353
354   /* *INDENT-OFF* */
355   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
356                                 0 /* honor unnumbered */,
357   ({
358     a = ip_interface_address_get_address (&im->lookup_main, ia);
359     if (is_admin_up)
360       ip6_add_interface_routes (vnm, sw_if_index,
361                                 im, fib_index,
362                                 ia);
363     else
364       ip6_del_interface_routes (im, fib_index,
365                                 a, ia->address_length);
366   }));
367   /* *INDENT-ON* */
368
369   return 0;
370 }
371
372 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
373
374 /* Built-in ip6 unicast rx feature path definition */
375 /* *INDENT-OFF* */
376 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
377 {
378   .arc_name  = "ip6-unicast",
379   .start_nodes = VNET_FEATURES ("ip6-input"),
380   .last_in_arc = "ip6-lookup",
381   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
382 };
383
384 VNET_FEATURE_INIT (ip6_flow_classify, static) =
385 {
386   .arc_name = "ip6-unicast",
387   .node_name = "ip6-flow-classify",
388   .runs_before = VNET_FEATURES ("ip6-inacl"),
389 };
390
391 VNET_FEATURE_INIT (ip6_inacl, static) =
392 {
393   .arc_name = "ip6-unicast",
394   .node_name = "ip6-inacl",
395   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
396 };
397
398 VNET_FEATURE_INIT (ip6_policer_classify, static) =
399 {
400   .arc_name = "ip6-unicast",
401   .node_name = "ip6-policer-classify",
402   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
403 };
404
405 VNET_FEATURE_INIT (ip6_ipsec, static) =
406 {
407   .arc_name = "ip6-unicast",
408   .node_name = "ipsec6-input-feature",
409   .runs_before = VNET_FEATURES ("l2tp-decap"),
410 };
411
412 VNET_FEATURE_INIT (ip6_l2tp, static) =
413 {
414   .arc_name = "ip6-unicast",
415   .node_name = "l2tp-decap",
416   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
417 };
418
419 VNET_FEATURE_INIT (ip6_vpath, static) =
420 {
421   .arc_name = "ip6-unicast",
422   .node_name = "vpath-input-ip6",
423   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
424 };
425
426 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
427 {
428   .arc_name = "ip6-unicast",
429   .node_name = "ip6-vxlan-bypass",
430   .runs_before = VNET_FEATURES ("ip6-lookup"),
431 };
432
433 VNET_FEATURE_INIT (ip6_not_enabled, static) =
434 {
435   .arc_name = "ip6-unicast",
436   .node_name = "ip6-not-enabled",
437   .runs_before = VNET_FEATURES ("ip6-lookup"),
438 };
439
440 VNET_FEATURE_INIT (ip6_lookup, static) =
441 {
442   .arc_name = "ip6-unicast",
443   .node_name = "ip6-lookup",
444   .runs_before = 0,  /*last feature*/
445 };
446
447 /* Built-in ip6 multicast rx feature path definition (none now) */
448 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
449 {
450   .arc_name  = "ip6-multicast",
451   .start_nodes = VNET_FEATURES ("ip6-input"),
452   .last_in_arc = "ip6-mfib-forward-lookup",
453   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
454 };
455
456 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
457   .arc_name = "ip6-multicast",
458   .node_name = "vpath-input-ip6",
459   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
460 };
461
462 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
463   .arc_name = "ip6-multicast",
464   .node_name = "ip6-not-enabled",
465   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
466 };
467
468 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
469   .arc_name = "ip6-multicast",
470   .node_name = "ip6-mfib-forward-lookup",
471   .runs_before = 0, /* last feature */
472 };
473
474 /* Built-in ip4 tx feature path definition */
475 VNET_FEATURE_ARC_INIT (ip6_output, static) =
476 {
477   .arc_name  = "ip6-output",
478   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
479   .last_in_arc = "interface-output",
480   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
481 };
482
483 VNET_FEATURE_INIT (ip6_outacl, static) = {
484   .arc_name = "ip6-output",
485   .node_name = "ip6-outacl",
486   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
487 };
488
489 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
490   .arc_name = "ip6-output",
491   .node_name = "ipsec6-output-feature",
492   .runs_before = VNET_FEATURES ("interface-output"),
493 };
494
495 VNET_FEATURE_INIT (ip6_interface_output, static) = {
496   .arc_name = "ip6-output",
497   .node_name = "interface-output",
498   .runs_before = 0, /* not before any other features */
499 };
500 /* *INDENT-ON* */
501
502 static clib_error_t *
503 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
504 {
505   ip6_main_t *im = &ip6_main;
506
507   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
508   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
509
510   if (!is_add)
511     {
512       /* Ensure that IPv6 is disabled */
513       ip6_main_t *im6 = &ip6_main;
514       ip_lookup_main_t *lm6 = &im6->lookup_main;
515       ip_interface_address_t *ia = 0;
516       ip6_address_t *address;
517       vlib_main_t *vm = vlib_get_main ();
518
519       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
520       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
521       /* *INDENT-OFF* */
522       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
523       ({
524         address = ip_interface_address_get_address (lm6, ia);
525         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
526       }));
527       /* *INDENT-ON* */
528       ip6_mfib_interface_enable_disable (sw_if_index, 0);
529     }
530
531   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
532                                is_add, 0, 0);
533
534   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
535                                sw_if_index, is_add, 0, 0);
536
537   return /* no error */ 0;
538 }
539
540 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
541
542 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
543                                 vlib_node_runtime_t * node,
544                                 vlib_frame_t * frame)
545 {
546   return ip6_lookup_inline (vm, node, frame);
547 }
548
549 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
550
551 /* *INDENT-OFF* */
552 VLIB_REGISTER_NODE (ip6_lookup_node) =
553 {
554   .name = "ip6-lookup",
555   .vector_size = sizeof (u32),
556   .format_trace = format_ip6_lookup_trace,
557   .n_next_nodes = IP6_LOOKUP_N_NEXT,
558   .next_nodes = IP6_LOOKUP_NEXT_NODES,
559 };
560 /* *INDENT-ON* */
561
562 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
563                                       vlib_node_runtime_t * node,
564                                       vlib_frame_t * frame)
565 {
566   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
567   u32 n_left, *from;
568   u32 thread_index = vm->thread_index;
569   ip6_main_t *im = &ip6_main;
570   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
571   u16 nexts[VLIB_FRAME_SIZE], *next;
572
573   from = vlib_frame_vector_args (frame);
574   n_left = frame->n_vectors;
575   next = nexts;
576
577   vlib_get_buffers (vm, from, bufs, n_left);
578
579   while (n_left >= 4)
580     {
581       const load_balance_t *lb0, *lb1;
582       const ip6_header_t *ip0, *ip1;
583       u32 lbi0, hc0, lbi1, hc1;
584       const dpo_id_t *dpo0, *dpo1;
585
586       /* Prefetch next iteration. */
587       {
588         vlib_prefetch_buffer_header (b[2], STORE);
589         vlib_prefetch_buffer_header (b[3], STORE);
590
591         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
592         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
593       }
594
595       ip0 = vlib_buffer_get_current (b[0]);
596       ip1 = vlib_buffer_get_current (b[1]);
597       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
598       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
599
600       lb0 = load_balance_get (lbi0);
601       lb1 = load_balance_get (lbi1);
602
603       /*
604        * this node is for via FIBs we can re-use the hash value from the
605        * to node if present.
606        * We don't want to use the same hash value at each level in the recursion
607        * graph as that would lead to polarisation
608        */
609       hc0 = hc1 = 0;
610
611       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
612         {
613           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
614             {
615               hc0 = vnet_buffer (b[0])->ip.flow_hash =
616                 vnet_buffer (b[0])->ip.flow_hash >> 1;
617             }
618           else
619             {
620               hc0 = vnet_buffer (b[0])->ip.flow_hash =
621                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
622             }
623           dpo0 = load_balance_get_fwd_bucket
624             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
625         }
626       else
627         {
628           dpo0 = load_balance_get_bucket_i (lb0, 0);
629         }
630       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
631         {
632           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
633             {
634               hc1 = vnet_buffer (b[1])->ip.flow_hash =
635                 vnet_buffer (b[1])->ip.flow_hash >> 1;
636             }
637           else
638             {
639               hc1 = vnet_buffer (b[1])->ip.flow_hash =
640                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
641             }
642           dpo1 = load_balance_get_fwd_bucket
643             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
644         }
645       else
646         {
647           dpo1 = load_balance_get_bucket_i (lb1, 0);
648         }
649
650       next[0] = dpo0->dpoi_next_node;
651       next[1] = dpo1->dpoi_next_node;
652
653       /* Only process the HBH Option Header if explicitly configured to do so */
654       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
655         {
656           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
657             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
658         }
659       /* Only process the HBH Option Header if explicitly configured to do so */
660       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
661         {
662           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
663             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
664         }
665
666       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
667       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
668
669       vlib_increment_combined_counter
670         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
671       vlib_increment_combined_counter
672         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
673
674       b += 2;
675       next += 2;
676       n_left -= 2;
677     }
678
679   while (n_left > 0)
680     {
681       const load_balance_t *lb0;
682       const ip6_header_t *ip0;
683       const dpo_id_t *dpo0;
684       u32 lbi0, hc0;
685
686       ip0 = vlib_buffer_get_current (b[0]);
687       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
688
689       lb0 = load_balance_get (lbi0);
690
691       hc0 = 0;
692       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
693         {
694           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
695             {
696               hc0 = vnet_buffer (b[0])->ip.flow_hash =
697                 vnet_buffer (b[0])->ip.flow_hash >> 1;
698             }
699           else
700             {
701               hc0 = vnet_buffer (b[0])->ip.flow_hash =
702                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
703             }
704           dpo0 = load_balance_get_fwd_bucket
705             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
706         }
707       else
708         {
709           dpo0 = load_balance_get_bucket_i (lb0, 0);
710         }
711
712       next[0] = dpo0->dpoi_next_node;
713       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
714
715       /* Only process the HBH Option Header if explicitly configured to do so */
716       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
717         {
718           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
719             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
720         }
721
722       vlib_increment_combined_counter
723         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
724
725       b += 1;
726       next += 1;
727       n_left -= 1;
728     }
729
730   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
731
732   if (node->flags & VLIB_NODE_FLAG_TRACE)
733     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
734
735   return frame->n_vectors;
736 }
737
738 /* *INDENT-OFF* */
739 VLIB_REGISTER_NODE (ip6_load_balance_node) =
740 {
741   .name = "ip6-load-balance",
742   .vector_size = sizeof (u32),
743   .sibling_of = "ip6-lookup",
744   .format_trace = format_ip6_lookup_trace,
745 };
746 /* *INDENT-ON* */
747
748 typedef struct
749 {
750   /* Adjacency taken. */
751   u32 adj_index;
752   u32 flow_hash;
753   u32 fib_index;
754
755   /* Packet data, possibly *after* rewrite. */
756   u8 packet_data[128 - 1 * sizeof (u32)];
757 }
758 ip6_forward_next_trace_t;
759
760 #ifndef CLIB_MARCH_VARIANT
761 u8 *
762 format_ip6_forward_next_trace (u8 * s, va_list * args)
763 {
764   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
765   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
766   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
767   u32 indent = format_get_indent (s);
768
769   s = format (s, "%U%U",
770               format_white_space, indent,
771               format_ip6_header, t->packet_data, sizeof (t->packet_data));
772   return s;
773 }
774 #endif
775
776 static u8 *
777 format_ip6_lookup_trace (u8 * s, va_list * args)
778 {
779   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
780   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
781   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
782   u32 indent = format_get_indent (s);
783
784   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
785               t->fib_index, t->adj_index, t->flow_hash);
786   s = format (s, "\n%U%U",
787               format_white_space, indent,
788               format_ip6_header, t->packet_data, sizeof (t->packet_data));
789   return s;
790 }
791
792
793 static u8 *
794 format_ip6_rewrite_trace (u8 * s, va_list * args)
795 {
796   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
797   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
798   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
799   u32 indent = format_get_indent (s);
800
801   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
802               t->fib_index, t->adj_index, format_ip_adjacency,
803               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
804   s = format (s, "\n%U%U",
805               format_white_space, indent,
806               format_ip_adjacency_packet_data,
807               t->adj_index, t->packet_data, sizeof (t->packet_data));
808   return s;
809 }
810
811 /* Common trace function for all ip6-forward next nodes. */
812 #ifndef CLIB_MARCH_VARIANT
813 void
814 ip6_forward_next_trace (vlib_main_t * vm,
815                         vlib_node_runtime_t * node,
816                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
817 {
818   u32 *from, n_left;
819   ip6_main_t *im = &ip6_main;
820
821   n_left = frame->n_vectors;
822   from = vlib_frame_vector_args (frame);
823
824   while (n_left >= 4)
825     {
826       u32 bi0, bi1;
827       vlib_buffer_t *b0, *b1;
828       ip6_forward_next_trace_t *t0, *t1;
829
830       /* Prefetch next iteration. */
831       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
832       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
833
834       bi0 = from[0];
835       bi1 = from[1];
836
837       b0 = vlib_get_buffer (vm, bi0);
838       b1 = vlib_get_buffer (vm, bi1);
839
840       if (b0->flags & VLIB_BUFFER_IS_TRACED)
841         {
842           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
843           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
844           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
845           t0->fib_index =
846             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
847              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
848             vec_elt (im->fib_index_by_sw_if_index,
849                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
850
851           clib_memcpy_fast (t0->packet_data,
852                             vlib_buffer_get_current (b0),
853                             sizeof (t0->packet_data));
854         }
855       if (b1->flags & VLIB_BUFFER_IS_TRACED)
856         {
857           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
858           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
859           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
860           t1->fib_index =
861             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
862              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
863             vec_elt (im->fib_index_by_sw_if_index,
864                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
865
866           clib_memcpy_fast (t1->packet_data,
867                             vlib_buffer_get_current (b1),
868                             sizeof (t1->packet_data));
869         }
870       from += 2;
871       n_left -= 2;
872     }
873
874   while (n_left >= 1)
875     {
876       u32 bi0;
877       vlib_buffer_t *b0;
878       ip6_forward_next_trace_t *t0;
879
880       bi0 = from[0];
881
882       b0 = vlib_get_buffer (vm, bi0);
883
884       if (b0->flags & VLIB_BUFFER_IS_TRACED)
885         {
886           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
887           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
888           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
889           t0->fib_index =
890             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
891              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
892             vec_elt (im->fib_index_by_sw_if_index,
893                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
894
895           clib_memcpy_fast (t0->packet_data,
896                             vlib_buffer_get_current (b0),
897                             sizeof (t0->packet_data));
898         }
899       from += 1;
900       n_left -= 1;
901     }
902 }
903
904 /* Compute TCP/UDP/ICMP6 checksum in software. */
905 u16
906 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
907                                    ip6_header_t * ip0, int *bogus_lengthp)
908 {
909   ip_csum_t sum0;
910   u16 sum16, payload_length_host_byte_order;
911   u32 i, n_this_buffer, n_bytes_left;
912   u32 headers_size = sizeof (ip0[0]);
913   void *data_this_buffer;
914
915   ASSERT (bogus_lengthp);
916   *bogus_lengthp = 0;
917
918   /* Initialize checksum with ip header. */
919   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
920   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
921   data_this_buffer = (void *) (ip0 + 1);
922
923   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
924     {
925       sum0 = ip_csum_with_carry (sum0,
926                                  clib_mem_unaligned (&ip0->
927                                                      src_address.as_uword[i],
928                                                      uword));
929       sum0 =
930         ip_csum_with_carry (sum0,
931                             clib_mem_unaligned (&ip0->dst_address.as_uword[i],
932                                                 uword));
933     }
934
935   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
936    * or UDP-Ping packets */
937   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
938     {
939       u32 skip_bytes;
940       ip6_hop_by_hop_ext_t *ext_hdr =
941         (ip6_hop_by_hop_ext_t *) data_this_buffer;
942
943       /* validate really icmp6 next */
944       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
945               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
946
947       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
948       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
949
950       payload_length_host_byte_order -= skip_bytes;
951       headers_size += skip_bytes;
952     }
953
954   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
955
956   if (p0)
957     {
958       u32 n_ip_bytes_this_buffer =
959         p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
960       if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
961         {
962           n_this_buffer = p0->current_length > headers_size ?
963             n_ip_bytes_this_buffer - headers_size : 0;
964         }
965     }
966
967   while (1)
968     {
969       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
970       n_bytes_left -= n_this_buffer;
971       if (n_bytes_left == 0)
972         break;
973
974       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
975         {
976           *bogus_lengthp = 1;
977           return 0xfefe;
978         }
979       p0 = vlib_get_buffer (vm, p0->next_buffer);
980       data_this_buffer = vlib_buffer_get_current (p0);
981       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
982     }
983
984   sum16 = ~ip_csum_fold (sum0);
985
986   return sum16;
987 }
988
989 u32
990 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
991 {
992   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
993   udp_header_t *udp0;
994   u16 sum16;
995   int bogus_length;
996
997   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
998   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
999           || ip0->protocol == IP_PROTOCOL_ICMP6
1000           || ip0->protocol == IP_PROTOCOL_UDP
1001           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1002
1003   udp0 = (void *) (ip0 + 1);
1004   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1005     {
1006       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1007                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1008       return p0->flags;
1009     }
1010
1011   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1012
1013   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1014                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1015
1016   return p0->flags;
1017 }
1018 #endif
1019
1020 /**
1021  * @brief returns number of links on which src is reachable.
1022  */
1023 always_inline int
1024 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1025 {
1026   const load_balance_t *lb0;
1027   index_t lbi;
1028   u32 fib_index;
1029
1030   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1031                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1032   fib_index =
1033     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1034     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1035
1036   lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address);
1037   lb0 = load_balance_get (lbi);
1038
1039   return (fib_urpf_check_size (lb0->lb_urpf));
1040 }
1041
1042 always_inline u8
1043 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1044                            u32 * udp_offset0)
1045 {
1046   u32 proto0;
1047   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1048   if (proto0 != IP_PROTOCOL_UDP)
1049     {
1050       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1051       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1052     }
1053   return proto0;
1054 }
1055
1056 /* *INDENT-OFF* */
1057 VNET_FEATURE_ARC_INIT (ip6_local) =
1058 {
1059   .arc_name  = "ip6-local",
1060   .start_nodes = VNET_FEATURES ("ip6-local"),
1061 };
1062 /* *INDENT-ON* */
1063
1064 always_inline uword
1065 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1066                   vlib_frame_t * frame, int head_of_feature_arc)
1067 {
1068   ip6_main_t *im = &ip6_main;
1069   ip_lookup_main_t *lm = &im->lookup_main;
1070   u32 *from, n_left_from;
1071   vlib_node_runtime_t *error_node =
1072     vlib_node_get_runtime (vm, ip6_input_node.index);
1073   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1074   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1075   u16 nexts[VLIB_FRAME_SIZE], *next;
1076
1077   from = vlib_frame_vector_args (frame);
1078   n_left_from = frame->n_vectors;
1079
1080   if (node->flags & VLIB_NODE_FLAG_TRACE)
1081     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1082
1083   vlib_get_buffers (vm, from, bufs, n_left_from);
1084   b = bufs;
1085   next = nexts;
1086
1087   while (n_left_from > 2)
1088     {
1089       /* Prefetch next iteration. */
1090       if (n_left_from >= 6)
1091         {
1092           vlib_prefetch_buffer_header (b[4], STORE);
1093           vlib_prefetch_buffer_header (b[5], STORE);
1094           vlib_prefetch_buffer_data (b[2], LOAD);
1095           vlib_prefetch_buffer_data (b[3], LOAD);
1096         }
1097
1098       u8 error[2];
1099       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1100       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1101
1102       ip6_header_t *ip[2];
1103       ip[0] = vlib_buffer_get_current (b[0]);
1104       ip[1] = vlib_buffer_get_current (b[1]);
1105
1106       if (head_of_feature_arc)
1107         {
1108           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1109           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1110
1111           u8 type[2];
1112           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1113           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1114
1115           u32 flags[2];
1116           flags[0] = b[0]->flags;
1117           flags[1] = b[1]->flags;
1118
1119           u32 good_l4_csum[2];
1120           good_l4_csum[0] =
1121             flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1122                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1123                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1124           good_l4_csum[1] =
1125             flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1126                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1127                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1128
1129           u32 udp_offset[2] = { };
1130           u8 is_tcp_udp[2];
1131           is_tcp_udp[0] =
1132             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1133           is_tcp_udp[1] =
1134             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1135           i16 len_diff[2] = { 0 };
1136           if (PREDICT_TRUE (is_tcp_udp[0]))
1137             {
1138               udp_header_t *udp =
1139                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1140               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1141                 && udp->checksum == 0;
1142               /* optimistically verify UDP length. */
1143               u16 ip_len, udp_len;
1144               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1145               udp_len = clib_net_to_host_u16 (udp->length);
1146               len_diff[0] = ip_len - udp_len;
1147             }
1148           if (PREDICT_TRUE (is_tcp_udp[1]))
1149             {
1150               udp_header_t *udp =
1151                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1152               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1153                 && udp->checksum == 0;
1154               /* optimistically verify UDP length. */
1155               u16 ip_len, udp_len;
1156               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1157               udp_len = clib_net_to_host_u16 (udp->length);
1158               len_diff[1] = ip_len - udp_len;
1159             }
1160
1161           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1162           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1163
1164           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1165           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1166
1167           u8 need_csum[2];
1168           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1169             && !good_l4_csum[0]
1170             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1171           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1172             && !good_l4_csum[1]
1173             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1174           if (PREDICT_FALSE (need_csum[0]))
1175             {
1176               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1177               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1178             }
1179           if (PREDICT_FALSE (need_csum[1]))
1180             {
1181               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1182               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1183             }
1184
1185           error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1186           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1187           error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1188           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1189
1190           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1191                          IP6_ERROR_UDP_CHECKSUM,
1192                          "Wrong IP6 errors constants");
1193           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1194                          IP6_ERROR_ICMP_CHECKSUM,
1195                          "Wrong IP6 errors constants");
1196
1197           error[0] =
1198             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1199           error[1] =
1200             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1201
1202           /* Drop packets from unroutable hosts. */
1203           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1204           u8 unroutable[2];
1205           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1206             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1207             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1208           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1209             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1210             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1211           if (PREDICT_FALSE (unroutable[0]))
1212             {
1213               error[0] =
1214                 !ip6_urpf_loose_check (im, b[0],
1215                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1216                 : error[0];
1217             }
1218           if (PREDICT_FALSE (unroutable[1]))
1219             {
1220               error[1] =
1221                 !ip6_urpf_loose_check (im, b[1],
1222                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1223                 : error[1];
1224             }
1225
1226           vnet_buffer (b[0])->ip.fib_index =
1227             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1228             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1229             vnet_buffer (b[0])->ip.fib_index;
1230           vnet_buffer (b[1])->ip.fib_index =
1231             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1232             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1233             vnet_buffer (b[1])->ip.fib_index;
1234         }                       /* head_of_feature_arc */
1235
1236       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1237       next[0] =
1238         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1239       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1240       next[1] =
1241         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1242
1243       b[0]->error = error_node->errors[0];
1244       b[1]->error = error_node->errors[1];
1245
1246       if (head_of_feature_arc)
1247         {
1248           u8 ip6_unknown[2];
1249           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1250           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1251           if (PREDICT_TRUE (ip6_unknown[0]))
1252             {
1253               u32 next32 = next[0];
1254               vnet_feature_arc_start (arc_index,
1255                                       vnet_buffer (b[0])->sw_if_index
1256                                       [VLIB_RX], &next32, b[0]);
1257               next[0] = next32;
1258             }
1259           if (PREDICT_TRUE (ip6_unknown[1]))
1260             {
1261               u32 next32 = next[1];
1262               vnet_feature_arc_start (arc_index,
1263                                       vnet_buffer (b[1])->sw_if_index
1264                                       [VLIB_RX], &next32, b[1]);
1265               next[1] = next32;
1266             }
1267         }
1268
1269       /* next */
1270       b += 2;
1271       next += 2;
1272       n_left_from -= 2;
1273     }
1274
1275   while (n_left_from)
1276     {
1277       u8 error;
1278       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1279
1280       ip6_header_t *ip;
1281       ip = vlib_buffer_get_current (b[0]);
1282
1283       if (head_of_feature_arc)
1284         {
1285           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1286           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1287
1288           u32 flags = b[0]->flags;
1289           u32 good_l4_csum =
1290             flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1291                      VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1292                      VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1293
1294           u32 udp_offset;
1295           i16 len_diff = 0;
1296           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1297           if (PREDICT_TRUE (is_tcp_udp))
1298             {
1299               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1300               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1301               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1302                 && udp->checksum == 0;
1303               /* optimistically verify UDP length. */
1304               u16 ip_len, udp_len;
1305               ip_len = clib_net_to_host_u16 (ip->payload_length);
1306               udp_len = clib_net_to_host_u16 (udp->length);
1307               len_diff = ip_len - udp_len;
1308             }
1309
1310           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1311           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1312
1313           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum
1314             && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1315           if (PREDICT_FALSE (need_csum))
1316             {
1317               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1318               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1319             }
1320
1321           error = IP6_ERROR_UNKNOWN_PROTOCOL;
1322           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1323
1324           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1325                          IP6_ERROR_UDP_CHECKSUM,
1326                          "Wrong IP6 errors constants");
1327           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1328                          IP6_ERROR_ICMP_CHECKSUM,
1329                          "Wrong IP6 errors constants");
1330
1331           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1332
1333           /* Drop packets from unroutable hosts. */
1334           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1335           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1336             && type != IP_BUILTIN_PROTOCOL_ICMP
1337             && !ip6_address_is_link_local_unicast (&ip->src_address);
1338           if (PREDICT_FALSE (unroutable))
1339             {
1340               error =
1341                 !ip6_urpf_loose_check (im, b[0],
1342                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1343                 error;
1344             }
1345
1346           vnet_buffer (b[0])->ip.fib_index =
1347             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1348             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1349             vnet_buffer (b[0])->ip.fib_index;
1350         }                       /* head_of_feature_arc */
1351
1352       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1353       next[0] =
1354         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1355
1356       b[0]->error = error_node->errors[0];
1357
1358       if (head_of_feature_arc)
1359         {
1360           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1361             {
1362               u32 next32 = next[0];
1363               vnet_feature_arc_start (arc_index,
1364                                       vnet_buffer (b[0])->sw_if_index
1365                                       [VLIB_RX], &next32, b[0]);
1366               next[0] = next32;
1367             }
1368         }
1369
1370       /* next */
1371       b += 1;
1372       next += 1;
1373       n_left_from -= 1;
1374     }
1375
1376   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1377   return frame->n_vectors;
1378 }
1379
1380 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1381                                vlib_frame_t * frame)
1382 {
1383   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1384 }
1385
1386 /* *INDENT-OFF* */
1387 VLIB_REGISTER_NODE (ip6_local_node) =
1388 {
1389   .name = "ip6-local",
1390   .vector_size = sizeof (u32),
1391   .format_trace = format_ip6_forward_next_trace,
1392   .n_next_nodes = IP_LOCAL_N_NEXT,
1393   .next_nodes =
1394   {
1395     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1396     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1397     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1398     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1399     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-reassembly",
1400   },
1401 };
1402 /* *INDENT-ON* */
1403
1404 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1405                                           vlib_node_runtime_t * node,
1406                                           vlib_frame_t * frame)
1407 {
1408   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1409 }
1410
1411 /* *INDENT-OFF* */
1412 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1413   .name = "ip6-local-end-of-arc",
1414   .vector_size = sizeof (u32),
1415
1416   .format_trace = format_ip6_forward_next_trace,
1417   .sibling_of = "ip6-local",
1418 };
1419
1420 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1421   .arc_name = "ip6-local",
1422   .node_name = "ip6-local-end-of-arc",
1423   .runs_before = 0, /* not before any other features */
1424 };
1425 /* *INDENT-ON* */
1426
1427 #ifdef CLIB_MARCH_VARIANT
1428 extern vlib_node_registration_t ip6_local_node;
1429
1430 #else
1431
1432 void
1433 ip6_register_protocol (u32 protocol, u32 node_index)
1434 {
1435   vlib_main_t *vm = vlib_get_main ();
1436   ip6_main_t *im = &ip6_main;
1437   ip_lookup_main_t *lm = &im->lookup_main;
1438
1439   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1440   lm->local_next_by_ip_protocol[protocol] =
1441     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1442 }
1443
1444 void
1445 ip6_unregister_protocol (u32 protocol)
1446 {
1447   ip6_main_t *im = &ip6_main;
1448   ip_lookup_main_t *lm = &im->lookup_main;
1449
1450   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1451   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1452 }
1453
1454 clib_error_t *
1455 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index,
1456                     u8 refresh)
1457 {
1458   vnet_main_t *vnm = vnet_get_main ();
1459   ip6_main_t *im = &ip6_main;
1460   icmp6_neighbor_solicitation_header_t *h;
1461   ip6_address_t *src;
1462   ip_interface_address_t *ia;
1463   ip_adjacency_t *adj;
1464   vnet_hw_interface_t *hi;
1465   vnet_sw_interface_t *si;
1466   vlib_buffer_t *b;
1467   adj_index_t ai;
1468   u32 bi = 0;
1469   int bogus_length;
1470
1471   si = vnet_get_sw_interface (vnm, sw_if_index);
1472
1473   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1474     {
1475       return clib_error_return (0, "%U: interface %U down",
1476                                 format_ip6_address, dst,
1477                                 format_vnet_sw_if_index_name, vnm,
1478                                 sw_if_index);
1479     }
1480
1481   src =
1482     ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1483   if (!src)
1484     {
1485       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1486       return clib_error_return
1487         (0, "no matching interface address for destination %U (interface %U)",
1488          format_ip6_address, dst,
1489          format_vnet_sw_if_index_name, vnm, sw_if_index);
1490     }
1491
1492   h =
1493     vlib_packet_template_get_packet (vm,
1494                                      &im->discover_neighbor_packet_template,
1495                                      &bi);
1496   if (!h)
1497     return clib_error_return (0, "ICMP6 NS packet allocation failed");
1498
1499   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1500
1501   /* Destination address is a solicited node multicast address.  We need to fill in
1502      the low 24 bits with low 24 bits of target's address. */
1503   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1504   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1505   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1506
1507   h->ip.src_address = src[0];
1508   h->neighbor.target_address = dst[0];
1509
1510   if (PREDICT_FALSE (!hi->hw_address))
1511     {
1512       return clib_error_return (0, "%U: interface %U do not support ip probe",
1513                                 format_ip6_address, dst,
1514                                 format_vnet_sw_if_index_name, vnm,
1515                                 sw_if_index);
1516     }
1517
1518   clib_memcpy_fast (h->link_layer_option.ethernet_address, hi->hw_address,
1519                     vec_len (hi->hw_address));
1520
1521   h->neighbor.icmp.checksum =
1522     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1523   ASSERT (bogus_length == 0);
1524
1525   b = vlib_get_buffer (vm, bi);
1526   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1528
1529   /* Add encapsulation string for software interface (e.g. ethernet header). */
1530   ip46_address_t nh = {
1531     .ip6 = *dst,
1532   };
1533
1534   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
1535                             VNET_LINK_IP6, &nh, sw_if_index);
1536   adj = adj_get (ai);
1537
1538   /* Peer has been previously resolved, retrieve glean adj instead */
1539   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE && refresh == 0)
1540     {
1541       adj_unlock (ai);
1542       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
1543                                   VNET_LINK_IP6, sw_if_index, &nh);
1544       adj = adj_get (ai);
1545     }
1546
1547   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1548   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1549
1550   {
1551     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1552     u32 *to_next = vlib_frame_vector_args (f);
1553     to_next[0] = bi;
1554     f->n_vectors = 1;
1555     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1556   }
1557
1558   adj_unlock (ai);
1559   return /* no error */ 0;
1560 }
1561 #endif
1562
1563 typedef enum
1564 {
1565   IP6_REWRITE_NEXT_DROP,
1566   IP6_REWRITE_NEXT_ICMP_ERROR,
1567   IP6_REWRITE_NEXT_FRAGMENT,
1568   IP6_REWRITE_N_NEXT            /* Last */
1569 } ip6_rewrite_next_t;
1570
1571 /**
1572  * This bits of an IPv6 address to mask to construct a multicast
1573  * MAC address
1574  */
1575 #define IP6_MCAST_ADDR_MASK 0xffffffff
1576
1577 always_inline void
1578 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1579                u16 adj_packet_bytes, bool is_locally_generated,
1580                u32 * next, u32 * error)
1581 {
1582   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1583     {
1584       if (is_locally_generated)
1585         {
1586           /* IP fragmentation */
1587           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1588                                    IP6_FRAG_NEXT_IP6_REWRITE, 0);
1589           *next = IP6_REWRITE_NEXT_FRAGMENT;
1590           *error = IP6_ERROR_MTU_EXCEEDED;
1591         }
1592       else
1593         {
1594           *error = IP6_ERROR_MTU_EXCEEDED;
1595           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1596                                        adj_packet_bytes);
1597           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1598         }
1599     }
1600 }
1601
1602 always_inline uword
1603 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1604                              vlib_node_runtime_t * node,
1605                              vlib_frame_t * frame,
1606                              int do_counters, int is_midchain, int is_mcast,
1607                              int do_gso)
1608 {
1609   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1610   u32 *from = vlib_frame_vector_args (frame);
1611   u32 n_left_from, n_left_to_next, *to_next, next_index;
1612   vlib_node_runtime_t *error_node =
1613     vlib_node_get_runtime (vm, ip6_input_node.index);
1614
1615   n_left_from = frame->n_vectors;
1616   next_index = node->cached_next_index;
1617   u32 thread_index = vm->thread_index;
1618
1619   while (n_left_from > 0)
1620     {
1621       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1622
1623       while (n_left_from >= 4 && n_left_to_next >= 2)
1624         {
1625           ip_adjacency_t *adj0, *adj1;
1626           vlib_buffer_t *p0, *p1;
1627           ip6_header_t *ip0, *ip1;
1628           u32 pi0, rw_len0, next0, error0, adj_index0;
1629           u32 pi1, rw_len1, next1, error1, adj_index1;
1630           u32 tx_sw_if_index0, tx_sw_if_index1;
1631           bool is_locally_originated0, is_locally_originated1;
1632
1633           /* Prefetch next iteration. */
1634           {
1635             vlib_buffer_t *p2, *p3;
1636
1637             p2 = vlib_get_buffer (vm, from[2]);
1638             p3 = vlib_get_buffer (vm, from[3]);
1639
1640             vlib_prefetch_buffer_header (p2, LOAD);
1641             vlib_prefetch_buffer_header (p3, LOAD);
1642
1643             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1644             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1645
1646             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1647             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1648           }
1649
1650           pi0 = to_next[0] = from[0];
1651           pi1 = to_next[1] = from[1];
1652
1653           from += 2;
1654           n_left_from -= 2;
1655           to_next += 2;
1656           n_left_to_next -= 2;
1657
1658           p0 = vlib_get_buffer (vm, pi0);
1659           p1 = vlib_get_buffer (vm, pi1);
1660
1661           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1662           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1663
1664           ip0 = vlib_buffer_get_current (p0);
1665           ip1 = vlib_buffer_get_current (p1);
1666
1667           error0 = error1 = IP6_ERROR_NONE;
1668           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1669
1670           is_locally_originated0 =
1671             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1672           if (PREDICT_TRUE (!is_locally_originated0))
1673             {
1674               i32 hop_limit0 = ip0->hop_limit;
1675
1676               /* Input node should have reject packets with hop limit 0. */
1677               ASSERT (ip0->hop_limit > 0);
1678
1679               hop_limit0 -= 1;
1680
1681               ip0->hop_limit = hop_limit0;
1682
1683               /*
1684                * If the hop count drops below 1 when forwarding, generate
1685                * an ICMP response.
1686                */
1687               if (PREDICT_FALSE (hop_limit0 <= 0))
1688                 {
1689                   error0 = IP6_ERROR_TIME_EXPIRED;
1690                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1691                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1692                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1693                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1694                                                0);
1695                 }
1696             }
1697           else
1698             {
1699               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1700             }
1701           is_locally_originated1 =
1702             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1703           if (PREDICT_TRUE (!is_locally_originated1))
1704             {
1705               i32 hop_limit1 = ip1->hop_limit;
1706
1707               /* Input node should have reject packets with hop limit 0. */
1708               ASSERT (ip1->hop_limit > 0);
1709
1710               hop_limit1 -= 1;
1711
1712               ip1->hop_limit = hop_limit1;
1713
1714               /*
1715                * If the hop count drops below 1 when forwarding, generate
1716                * an ICMP response.
1717                */
1718               if (PREDICT_FALSE (hop_limit1 <= 0))
1719                 {
1720                   error1 = IP6_ERROR_TIME_EXPIRED;
1721                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1722                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1723                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1724                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1725                                                0);
1726                 }
1727             }
1728           else
1729             {
1730               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1731             }
1732           adj0 = adj_get (adj_index0);
1733           adj1 = adj_get (adj_index1);
1734
1735           rw_len0 = adj0[0].rewrite_header.data_bytes;
1736           rw_len1 = adj1[0].rewrite_header.data_bytes;
1737           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1738           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1739
1740           if (do_counters)
1741             {
1742               vlib_increment_combined_counter
1743                 (&adjacency_counters,
1744                  thread_index, adj_index0, 1,
1745                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1746               vlib_increment_combined_counter
1747                 (&adjacency_counters,
1748                  thread_index, adj_index1, 1,
1749                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1750             }
1751
1752           /* Check MTU of outgoing interface. */
1753           u16 ip0_len =
1754             clib_net_to_host_u16 (ip0->payload_length) +
1755             sizeof (ip6_header_t);
1756           u16 ip1_len =
1757             clib_net_to_host_u16 (ip1->payload_length) +
1758             sizeof (ip6_header_t);
1759           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1760             ip0_len = gso_mtu_sz (p0);
1761           if (do_gso && (p1->flags & VNET_BUFFER_F_GSO))
1762             ip1_len = gso_mtu_sz (p1);
1763
1764
1765
1766           ip6_mtu_check (p0, ip0_len,
1767                          adj0[0].rewrite_header.max_l3_packet_bytes,
1768                          is_locally_originated0, &next0, &error0);
1769           ip6_mtu_check (p1, ip1_len,
1770                          adj1[0].rewrite_header.max_l3_packet_bytes,
1771                          is_locally_originated1, &next1, &error1);
1772
1773           /* Don't adjust the buffer for hop count issue; icmp-error node
1774            * wants to see the IP header */
1775           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1776             {
1777               p0->current_data -= rw_len0;
1778               p0->current_length += rw_len0;
1779
1780               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1781               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1782               next0 = adj0[0].rewrite_header.next_index;
1783
1784               if (PREDICT_FALSE
1785                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1786                 vnet_feature_arc_start (lm->output_feature_arc_index,
1787                                         tx_sw_if_index0, &next0, p0);
1788             }
1789           else
1790             {
1791               p0->error = error_node->errors[error0];
1792             }
1793           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1794             {
1795               p1->current_data -= rw_len1;
1796               p1->current_length += rw_len1;
1797
1798               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1799               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1800               next1 = adj1[0].rewrite_header.next_index;
1801
1802               if (PREDICT_FALSE
1803                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1804                 vnet_feature_arc_start (lm->output_feature_arc_index,
1805                                         tx_sw_if_index1, &next1, p1);
1806             }
1807           else
1808             {
1809               p1->error = error_node->errors[error1];
1810             }
1811
1812           if (is_midchain)
1813             {
1814               /* before we paint on the next header, update the L4
1815                * checksums if required, since there's no offload on a tunnel */
1816               calc_checksums (vm, p0);
1817               calc_checksums (vm, p1);
1818             }
1819
1820           /* Guess we are only writing on simple Ethernet header. */
1821           vnet_rewrite_two_headers (adj0[0], adj1[0],
1822                                     ip0, ip1, sizeof (ethernet_header_t));
1823
1824           if (is_midchain)
1825             {
1826               if (adj0->sub_type.midchain.fixup_func)
1827                 adj0->sub_type.midchain.fixup_func
1828                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1829               if (adj1->sub_type.midchain.fixup_func)
1830                 adj1->sub_type.midchain.fixup_func
1831                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1832             }
1833           if (is_mcast)
1834             {
1835               /*
1836                * copy bytes from the IP address into the MAC rewrite
1837                */
1838               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1839                                           adj0->
1840                                           rewrite_header.dst_mcast_offset,
1841                                           &ip0->dst_address.as_u32[3],
1842                                           (u8 *) ip0);
1843               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1844                                           adj1->
1845                                           rewrite_header.dst_mcast_offset,
1846                                           &ip1->dst_address.as_u32[3],
1847                                           (u8 *) ip1);
1848             }
1849
1850           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1851                                            to_next, n_left_to_next,
1852                                            pi0, pi1, next0, next1);
1853         }
1854
1855       while (n_left_from > 0 && n_left_to_next > 0)
1856         {
1857           ip_adjacency_t *adj0;
1858           vlib_buffer_t *p0;
1859           ip6_header_t *ip0;
1860           u32 pi0, rw_len0;
1861           u32 adj_index0, next0, error0;
1862           u32 tx_sw_if_index0;
1863           bool is_locally_originated0;
1864
1865           pi0 = to_next[0] = from[0];
1866
1867           p0 = vlib_get_buffer (vm, pi0);
1868
1869           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1870
1871           adj0 = adj_get (adj_index0);
1872
1873           ip0 = vlib_buffer_get_current (p0);
1874
1875           error0 = IP6_ERROR_NONE;
1876           next0 = IP6_REWRITE_NEXT_DROP;
1877
1878           /* Check hop limit */
1879           is_locally_originated0 =
1880             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1881           if (PREDICT_TRUE (!is_locally_originated0))
1882             {
1883               i32 hop_limit0 = ip0->hop_limit;
1884
1885               ASSERT (ip0->hop_limit > 0);
1886
1887               hop_limit0 -= 1;
1888
1889               ip0->hop_limit = hop_limit0;
1890
1891               if (PREDICT_FALSE (hop_limit0 <= 0))
1892                 {
1893                   /*
1894                    * If the hop count drops below 1 when forwarding, generate
1895                    * an ICMP response.
1896                    */
1897                   error0 = IP6_ERROR_TIME_EXPIRED;
1898                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1899                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1900                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1901                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1902                                                0);
1903                 }
1904             }
1905           else
1906             {
1907               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1908             }
1909
1910           if (is_midchain)
1911             {
1912               calc_checksums (vm, p0);
1913             }
1914
1915           /* Guess we are only writing on simple Ethernet header. */
1916           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1917
1918           /* Update packet buffer attributes/set output interface. */
1919           rw_len0 = adj0[0].rewrite_header.data_bytes;
1920           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1921
1922           if (do_counters)
1923             {
1924               vlib_increment_combined_counter
1925                 (&adjacency_counters,
1926                  thread_index, adj_index0, 1,
1927                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1928             }
1929
1930           /* Check MTU of outgoing interface. */
1931           u16 ip0_len =
1932             clib_net_to_host_u16 (ip0->payload_length) +
1933             sizeof (ip6_header_t);
1934           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1935             ip0_len = gso_mtu_sz (p0);
1936
1937           ip6_mtu_check (p0, ip0_len,
1938                          adj0[0].rewrite_header.max_l3_packet_bytes,
1939                          is_locally_originated0, &next0, &error0);
1940
1941           /* Don't adjust the buffer for hop count issue; icmp-error node
1942            * wants to see the IP header */
1943           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1944             {
1945               p0->current_data -= rw_len0;
1946               p0->current_length += rw_len0;
1947
1948               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1949
1950               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1951               next0 = adj0[0].rewrite_header.next_index;
1952
1953               if (PREDICT_FALSE
1954                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1955                 vnet_feature_arc_start (lm->output_feature_arc_index,
1956                                         tx_sw_if_index0, &next0, p0);
1957             }
1958           else
1959             {
1960               p0->error = error_node->errors[error0];
1961             }
1962
1963           if (is_midchain)
1964             {
1965               if (adj0->sub_type.midchain.fixup_func)
1966                 adj0->sub_type.midchain.fixup_func
1967                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1968             }
1969           if (is_mcast)
1970             {
1971               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1972                                           adj0->
1973                                           rewrite_header.dst_mcast_offset,
1974                                           &ip0->dst_address.as_u32[3],
1975                                           (u8 *) ip0);
1976             }
1977
1978           from += 1;
1979           n_left_from -= 1;
1980           to_next += 1;
1981           n_left_to_next -= 1;
1982
1983           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1984                                            to_next, n_left_to_next,
1985                                            pi0, next0);
1986         }
1987
1988       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1989     }
1990
1991   /* Need to do trace after rewrites to pick up new packet data. */
1992   if (node->flags & VLIB_NODE_FLAG_TRACE)
1993     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1994
1995   return frame->n_vectors;
1996 }
1997
1998 always_inline uword
1999 ip6_rewrite_inline (vlib_main_t * vm,
2000                     vlib_node_runtime_t * node,
2001                     vlib_frame_t * frame,
2002                     int do_counters, int is_midchain, int is_mcast)
2003 {
2004   vnet_main_t *vnm = vnet_get_main ();
2005   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2006     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2007                                         is_midchain, is_mcast,
2008                                         1 /* do_gso */ );
2009   else
2010     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2011                                         is_midchain, is_mcast,
2012                                         0 /* no do_gso */ );
2013 }
2014
2015 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2016                                  vlib_node_runtime_t * node,
2017                                  vlib_frame_t * frame)
2018 {
2019   if (adj_are_counters_enabled ())
2020     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2021   else
2022     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2023 }
2024
2025 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2026                                        vlib_node_runtime_t * node,
2027                                        vlib_frame_t * frame)
2028 {
2029   if (adj_are_counters_enabled ())
2030     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2031   else
2032     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2033 }
2034
2035 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2036                                        vlib_node_runtime_t * node,
2037                                        vlib_frame_t * frame)
2038 {
2039   if (adj_are_counters_enabled ())
2040     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2041   else
2042     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2043 }
2044
2045 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2046                                   vlib_node_runtime_t * node,
2047                                   vlib_frame_t * frame)
2048 {
2049   if (adj_are_counters_enabled ())
2050     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2051   else
2052     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2053 }
2054
2055 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2056                                         vlib_node_runtime_t * node,
2057                                         vlib_frame_t * frame)
2058 {
2059   if (adj_are_counters_enabled ())
2060     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2061   else
2062     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2063 }
2064
2065 /* *INDENT-OFF* */
2066 VLIB_REGISTER_NODE (ip6_midchain_node) =
2067 {
2068   .name = "ip6-midchain",
2069   .vector_size = sizeof (u32),
2070   .format_trace = format_ip6_forward_next_trace,
2071   .sibling_of = "ip6-rewrite",
2072   };
2073
2074 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2075 {
2076   .name = "ip6-rewrite",
2077   .vector_size = sizeof (u32),
2078   .format_trace = format_ip6_rewrite_trace,
2079   .n_next_nodes = IP6_REWRITE_N_NEXT,
2080   .next_nodes =
2081   {
2082     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2083     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2084     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2085   },
2086 };
2087
2088 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2089   .name = "ip6-rewrite-bcast",
2090   .vector_size = sizeof (u32),
2091
2092   .format_trace = format_ip6_rewrite_trace,
2093   .sibling_of = "ip6-rewrite",
2094 };
2095
2096 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2097 {
2098   .name = "ip6-rewrite-mcast",
2099   .vector_size = sizeof (u32),
2100   .format_trace = format_ip6_rewrite_trace,
2101   .sibling_of = "ip6-rewrite",
2102 };
2103
2104
2105 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2106 {
2107   .name = "ip6-mcast-midchain",
2108   .vector_size = sizeof (u32),
2109   .format_trace = format_ip6_rewrite_trace,
2110   .sibling_of = "ip6-rewrite",
2111 };
2112
2113 /* *INDENT-ON* */
2114
2115 /*
2116  * Hop-by-Hop handling
2117  */
2118 #ifndef CLIB_MARCH_VARIANT
2119 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2120 #endif /* CLIB_MARCH_VARIANT */
2121
2122 #define foreach_ip6_hop_by_hop_error \
2123 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2124 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2125 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2126
2127 /* *INDENT-OFF* */
2128 typedef enum
2129 {
2130 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2131   foreach_ip6_hop_by_hop_error
2132 #undef _
2133   IP6_HOP_BY_HOP_N_ERROR,
2134 } ip6_hop_by_hop_error_t;
2135 /* *INDENT-ON* */
2136
2137 /*
2138  * Primary h-b-h handler trace support
2139  * We work pretty hard on the problem for obvious reasons
2140  */
2141 typedef struct
2142 {
2143   u32 next_index;
2144   u32 trace_len;
2145   u8 option_data[256];
2146 } ip6_hop_by_hop_trace_t;
2147
2148 extern vlib_node_registration_t ip6_hop_by_hop_node;
2149
2150 static char *ip6_hop_by_hop_error_strings[] = {
2151 #define _(sym,string) string,
2152   foreach_ip6_hop_by_hop_error
2153 #undef _
2154 };
2155
2156 #ifndef CLIB_MARCH_VARIANT
2157 u8 *
2158 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2159 {
2160   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2161   int total_len = va_arg (*args, int);
2162   ip6_hop_by_hop_option_t *opt0, *limit0;
2163   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2164   u8 type0;
2165
2166   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2167               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2168
2169   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2170   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2171
2172   while (opt0 < limit0)
2173     {
2174       type0 = opt0->type;
2175       switch (type0)
2176         {
2177         case 0:         /* Pad, just stop */
2178           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2179           break;
2180
2181         default:
2182           if (hm->trace[type0])
2183             {
2184               s = (*hm->trace[type0]) (s, opt0);
2185             }
2186           else
2187             {
2188               s =
2189                 format (s, "\n    unrecognized option %d length %d", type0,
2190                         opt0->length);
2191             }
2192           opt0 =
2193             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2194                                          sizeof (ip6_hop_by_hop_option_t));
2195           break;
2196         }
2197     }
2198   return s;
2199 }
2200 #endif
2201
2202 static u8 *
2203 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2204 {
2205   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2206   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2207   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2208   ip6_hop_by_hop_header_t *hbh0;
2209   ip6_hop_by_hop_option_t *opt0, *limit0;
2210   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2211
2212   u8 type0;
2213
2214   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2215
2216   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2217               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2218
2219   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2220   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2221
2222   while (opt0 < limit0)
2223     {
2224       type0 = opt0->type;
2225       switch (type0)
2226         {
2227         case 0:         /* Pad, just stop */
2228           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2229           break;
2230
2231         default:
2232           if (hm->trace[type0])
2233             {
2234               s = (*hm->trace[type0]) (s, opt0);
2235             }
2236           else
2237             {
2238               s =
2239                 format (s, "\n    unrecognized option %d length %d", type0,
2240                         opt0->length);
2241             }
2242           opt0 =
2243             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2244                                          sizeof (ip6_hop_by_hop_option_t));
2245           break;
2246         }
2247     }
2248   return s;
2249 }
2250
2251 always_inline u8
2252 ip6_scan_hbh_options (vlib_buffer_t * b0,
2253                       ip6_header_t * ip0,
2254                       ip6_hop_by_hop_header_t * hbh0,
2255                       ip6_hop_by_hop_option_t * opt0,
2256                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2257 {
2258   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2259   u8 type0;
2260   u8 error0 = 0;
2261
2262   while (opt0 < limit0)
2263     {
2264       type0 = opt0->type;
2265       switch (type0)
2266         {
2267         case 0:         /* Pad1 */
2268           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2269           continue;
2270         case 1:         /* PadN */
2271           break;
2272         default:
2273           if (hm->options[type0])
2274             {
2275               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2276                 {
2277                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2278                   return (error0);
2279                 }
2280             }
2281           else
2282             {
2283               /* Unrecognized mandatory option, check the two high order bits */
2284               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2285                 {
2286                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2287                   break;
2288                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2289                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2290                   *next0 = IP_LOOKUP_NEXT_DROP;
2291                   break;
2292                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2293                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2294                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2295                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2296                                                ICMP6_parameter_problem_unrecognized_option,
2297                                                (u8 *) opt0 - (u8 *) ip0);
2298                   break;
2299                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2300                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2301                   if (!ip6_address_is_multicast (&ip0->dst_address))
2302                     {
2303                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2304                       icmp6_error_set_vnet_buffer (b0,
2305                                                    ICMP6_parameter_problem,
2306                                                    ICMP6_parameter_problem_unrecognized_option,
2307                                                    (u8 *) opt0 - (u8 *) ip0);
2308                     }
2309                   else
2310                     {
2311                       *next0 = IP_LOOKUP_NEXT_DROP;
2312                     }
2313                   break;
2314                 }
2315               return (error0);
2316             }
2317         }
2318       opt0 =
2319         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2320                                      sizeof (ip6_hop_by_hop_option_t));
2321     }
2322   return (error0);
2323 }
2324
2325 /*
2326  * Process the Hop-by-Hop Options header
2327  */
2328 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2329                                     vlib_node_runtime_t * node,
2330                                     vlib_frame_t * frame)
2331 {
2332   vlib_node_runtime_t *error_node =
2333     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2334   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2335   u32 n_left_from, *from, *to_next;
2336   ip_lookup_next_t next_index;
2337
2338   from = vlib_frame_vector_args (frame);
2339   n_left_from = frame->n_vectors;
2340   next_index = node->cached_next_index;
2341
2342   while (n_left_from > 0)
2343     {
2344       u32 n_left_to_next;
2345
2346       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2347
2348       while (n_left_from >= 4 && n_left_to_next >= 2)
2349         {
2350           u32 bi0, bi1;
2351           vlib_buffer_t *b0, *b1;
2352           u32 next0, next1;
2353           ip6_header_t *ip0, *ip1;
2354           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2355           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2356           u8 error0 = 0, error1 = 0;
2357
2358           /* Prefetch next iteration. */
2359           {
2360             vlib_buffer_t *p2, *p3;
2361
2362             p2 = vlib_get_buffer (vm, from[2]);
2363             p3 = vlib_get_buffer (vm, from[3]);
2364
2365             vlib_prefetch_buffer_header (p2, LOAD);
2366             vlib_prefetch_buffer_header (p3, LOAD);
2367
2368             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2369             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2370           }
2371
2372           /* Speculatively enqueue b0, b1 to the current next frame */
2373           to_next[0] = bi0 = from[0];
2374           to_next[1] = bi1 = from[1];
2375           from += 2;
2376           to_next += 2;
2377           n_left_from -= 2;
2378           n_left_to_next -= 2;
2379
2380           b0 = vlib_get_buffer (vm, bi0);
2381           b1 = vlib_get_buffer (vm, bi1);
2382
2383           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2384           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2385           ip_adjacency_t *adj0 = adj_get (adj_index0);
2386           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2387           ip_adjacency_t *adj1 = adj_get (adj_index1);
2388
2389           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2390           next0 = adj0->lookup_next_index;
2391           next1 = adj1->lookup_next_index;
2392
2393           ip0 = vlib_buffer_get_current (b0);
2394           ip1 = vlib_buffer_get_current (b1);
2395           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2396           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2397           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2398           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2399           limit0 =
2400             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2401                                          ((hbh0->length + 1) << 3));
2402           limit1 =
2403             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2404                                          ((hbh1->length + 1) << 3));
2405
2406           /*
2407            * Basic validity checks
2408            */
2409           if ((hbh0->length + 1) << 3 >
2410               clib_net_to_host_u16 (ip0->payload_length))
2411             {
2412               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2413               next0 = IP_LOOKUP_NEXT_DROP;
2414               goto outdual;
2415             }
2416           /* Scan the set of h-b-h options, process ones that we understand */
2417           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2418
2419           if ((hbh1->length + 1) << 3 >
2420               clib_net_to_host_u16 (ip1->payload_length))
2421             {
2422               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2423               next1 = IP_LOOKUP_NEXT_DROP;
2424               goto outdual;
2425             }
2426           /* Scan the set of h-b-h options, process ones that we understand */
2427           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2428
2429         outdual:
2430           /* Has the classifier flagged this buffer for special treatment? */
2431           if (PREDICT_FALSE
2432               ((error0 == 0)
2433                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2434             next0 = hm->next_override;
2435
2436           /* Has the classifier flagged this buffer for special treatment? */
2437           if (PREDICT_FALSE
2438               ((error1 == 0)
2439                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2440             next1 = hm->next_override;
2441
2442           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2443             {
2444               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2445                 {
2446                   ip6_hop_by_hop_trace_t *t =
2447                     vlib_add_trace (vm, node, b0, sizeof (*t));
2448                   u32 trace_len = (hbh0->length + 1) << 3;
2449                   t->next_index = next0;
2450                   /* Capture the h-b-h option verbatim */
2451                   trace_len =
2452                     trace_len <
2453                     ARRAY_LEN (t->option_data) ? trace_len :
2454                     ARRAY_LEN (t->option_data);
2455                   t->trace_len = trace_len;
2456                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2457                 }
2458               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2459                 {
2460                   ip6_hop_by_hop_trace_t *t =
2461                     vlib_add_trace (vm, node, b1, sizeof (*t));
2462                   u32 trace_len = (hbh1->length + 1) << 3;
2463                   t->next_index = next1;
2464                   /* Capture the h-b-h option verbatim */
2465                   trace_len =
2466                     trace_len <
2467                     ARRAY_LEN (t->option_data) ? trace_len :
2468                     ARRAY_LEN (t->option_data);
2469                   t->trace_len = trace_len;
2470                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2471                 }
2472
2473             }
2474
2475           b0->error = error_node->errors[error0];
2476           b1->error = error_node->errors[error1];
2477
2478           /* verify speculative enqueue, maybe switch current next frame */
2479           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2480                                            n_left_to_next, bi0, bi1, next0,
2481                                            next1);
2482         }
2483
2484       while (n_left_from > 0 && n_left_to_next > 0)
2485         {
2486           u32 bi0;
2487           vlib_buffer_t *b0;
2488           u32 next0;
2489           ip6_header_t *ip0;
2490           ip6_hop_by_hop_header_t *hbh0;
2491           ip6_hop_by_hop_option_t *opt0, *limit0;
2492           u8 error0 = 0;
2493
2494           /* Speculatively enqueue b0 to the current next frame */
2495           bi0 = from[0];
2496           to_next[0] = bi0;
2497           from += 1;
2498           to_next += 1;
2499           n_left_from -= 1;
2500           n_left_to_next -= 1;
2501
2502           b0 = vlib_get_buffer (vm, bi0);
2503           /*
2504            * Default use the next_index from the adjacency.
2505            * A HBH option rarely redirects to a different node
2506            */
2507           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2508           ip_adjacency_t *adj0 = adj_get (adj_index0);
2509           next0 = adj0->lookup_next_index;
2510
2511           ip0 = vlib_buffer_get_current (b0);
2512           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2513           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2514           limit0 =
2515             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2516                                          ((hbh0->length + 1) << 3));
2517
2518           /*
2519            * Basic validity checks
2520            */
2521           if ((hbh0->length + 1) << 3 >
2522               clib_net_to_host_u16 (ip0->payload_length))
2523             {
2524               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2525               next0 = IP_LOOKUP_NEXT_DROP;
2526               goto out0;
2527             }
2528
2529           /* Scan the set of h-b-h options, process ones that we understand */
2530           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2531
2532         out0:
2533           /* Has the classifier flagged this buffer for special treatment? */
2534           if (PREDICT_FALSE
2535               ((error0 == 0)
2536                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2537             next0 = hm->next_override;
2538
2539           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2540             {
2541               ip6_hop_by_hop_trace_t *t =
2542                 vlib_add_trace (vm, node, b0, sizeof (*t));
2543               u32 trace_len = (hbh0->length + 1) << 3;
2544               t->next_index = next0;
2545               /* Capture the h-b-h option verbatim */
2546               trace_len =
2547                 trace_len <
2548                 ARRAY_LEN (t->option_data) ? trace_len :
2549                 ARRAY_LEN (t->option_data);
2550               t->trace_len = trace_len;
2551               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2552             }
2553
2554           b0->error = error_node->errors[error0];
2555
2556           /* verify speculative enqueue, maybe switch current next frame */
2557           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2558                                            n_left_to_next, bi0, next0);
2559         }
2560       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2561     }
2562   return frame->n_vectors;
2563 }
2564
2565 /* *INDENT-OFF* */
2566 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2567 {
2568   .name = "ip6-hop-by-hop",
2569   .sibling_of = "ip6-lookup",
2570   .vector_size = sizeof (u32),
2571   .format_trace = format_ip6_hop_by_hop_trace,
2572   .type = VLIB_NODE_TYPE_INTERNAL,
2573   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2574   .error_strings = ip6_hop_by_hop_error_strings,
2575   .n_next_nodes = 0,
2576 };
2577 /* *INDENT-ON* */
2578
2579 static clib_error_t *
2580 ip6_hop_by_hop_init (vlib_main_t * vm)
2581 {
2582   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2583   clib_memset (hm->options, 0, sizeof (hm->options));
2584   clib_memset (hm->trace, 0, sizeof (hm->trace));
2585   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2586   return (0);
2587 }
2588
2589 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2590
2591 #ifndef CLIB_MARCH_VARIANT
2592 void
2593 ip6_hbh_set_next_override (uword next)
2594 {
2595   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2596
2597   hm->next_override = next;
2598 }
2599
2600 int
2601 ip6_hbh_register_option (u8 option,
2602                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2603                                       ip6_hop_by_hop_option_t * opt),
2604                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2605 {
2606   ip6_main_t *im = &ip6_main;
2607   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2608
2609   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2610
2611   /* Already registered */
2612   if (hm->options[option])
2613     return (-1);
2614
2615   hm->options[option] = options;
2616   hm->trace[option] = trace;
2617
2618   /* Set global variable */
2619   im->hbh_enabled = 1;
2620
2621   return (0);
2622 }
2623
2624 int
2625 ip6_hbh_unregister_option (u8 option)
2626 {
2627   ip6_main_t *im = &ip6_main;
2628   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2629
2630   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2631
2632   /* Not registered */
2633   if (!hm->options[option])
2634     return (-1);
2635
2636   hm->options[option] = NULL;
2637   hm->trace[option] = NULL;
2638
2639   /* Disable global knob if this was the last option configured */
2640   int i;
2641   bool found = false;
2642   for (i = 0; i < 256; i++)
2643     {
2644       if (hm->options[option])
2645         {
2646           found = true;
2647           break;
2648         }
2649     }
2650   if (!found)
2651     im->hbh_enabled = 0;
2652
2653   return (0);
2654 }
2655
2656 /* Global IP6 main. */
2657 ip6_main_t ip6_main;
2658 #endif
2659
2660 static clib_error_t *
2661 ip6_lookup_init (vlib_main_t * vm)
2662 {
2663   ip6_main_t *im = &ip6_main;
2664   clib_error_t *error;
2665   uword i;
2666
2667   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2668     return error;
2669
2670   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2671     {
2672       u32 j, i0, i1;
2673
2674       i0 = i / 32;
2675       i1 = i % 32;
2676
2677       for (j = 0; j < i0; j++)
2678         im->fib_masks[i].as_u32[j] = ~0;
2679
2680       if (i1)
2681         im->fib_masks[i].as_u32[i0] =
2682           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2683     }
2684
2685   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2686
2687   if (im->lookup_table_nbuckets == 0)
2688     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2689
2690   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2691
2692   if (im->lookup_table_size == 0)
2693     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2694
2695   clib_bihash_init_24_8 (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2696                          "ip6 FIB fwding table",
2697                          im->lookup_table_nbuckets, im->lookup_table_size);
2698   clib_bihash_init_24_8 (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2699                          "ip6 FIB non-fwding table",
2700                          im->lookup_table_nbuckets, im->lookup_table_size);
2701   clib_bihash_init_40_8 (&im->ip6_mtable.ip6_mhash,
2702                          "ip6 mFIB table",
2703                          im->lookup_table_nbuckets, im->lookup_table_size);
2704
2705   /* Create FIB with index 0 and table id of 0. */
2706   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2707                                      FIB_SOURCE_DEFAULT_ROUTE);
2708   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2709                                       MFIB_SOURCE_DEFAULT_ROUTE);
2710
2711   {
2712     pg_node_t *pn;
2713     pn = pg_get_node (ip6_lookup_node.index);
2714     pn->unformat_edit = unformat_pg_ip6_header;
2715   }
2716
2717   /* Unless explicitly configured, don't process HBH options */
2718   im->hbh_enabled = 0;
2719
2720   {
2721     icmp6_neighbor_solicitation_header_t p;
2722
2723     clib_memset (&p, 0, sizeof (p));
2724
2725     p.ip.ip_version_traffic_class_and_flow_label =
2726       clib_host_to_net_u32 (0x6 << 28);
2727     p.ip.payload_length =
2728       clib_host_to_net_u16 (sizeof (p) -
2729                             STRUCT_OFFSET_OF
2730                             (icmp6_neighbor_solicitation_header_t, neighbor));
2731     p.ip.protocol = IP_PROTOCOL_ICMP6;
2732     p.ip.hop_limit = 255;
2733     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2734
2735     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2736
2737     p.link_layer_option.header.type =
2738       ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2739     p.link_layer_option.header.n_data_u64s =
2740       sizeof (p.link_layer_option) / sizeof (u64);
2741
2742     vlib_packet_template_init (vm,
2743                                &im->discover_neighbor_packet_template,
2744                                &p, sizeof (p),
2745                                /* alloc chunk size */ 8,
2746                                "ip6 neighbor discovery");
2747   }
2748
2749   return error;
2750 }
2751
2752 VLIB_INIT_FUNCTION (ip6_lookup_init);
2753
2754 static clib_error_t *
2755 test_ip6_link_command_fn (vlib_main_t * vm,
2756                           unformat_input_t * input, vlib_cli_command_t * cmd)
2757 {
2758   u8 mac[6];
2759   ip6_address_t _a, *a = &_a;
2760
2761   if (unformat (input, "%U", unformat_ethernet_address, mac))
2762     {
2763       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2764       vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
2765       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2766       vlib_cli_output (vm, "Original MAC address: %U",
2767                        format_ethernet_address, mac);
2768     }
2769
2770   return 0;
2771 }
2772
2773 /*?
2774  * This command converts the given MAC Address into an IPv6 link-local
2775  * address.
2776  *
2777  * @cliexpar
2778  * Example of how to create an IPv6 link-local address:
2779  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2780  * Link local address: fe80::14d9:e0ff:fe91:7986
2781  * Original MAC address: 16:d9:e0:91:79:86
2782  * @cliexend
2783 ?*/
2784 /* *INDENT-OFF* */
2785 VLIB_CLI_COMMAND (test_link_command, static) =
2786 {
2787   .path = "test ip6 link",
2788   .function = test_ip6_link_command_fn,
2789   .short_help = "test ip6 link <mac-address>",
2790 };
2791 /* *INDENT-ON* */
2792
2793 #ifndef CLIB_MARCH_VARIANT
2794 int
2795 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2796 {
2797   u32 fib_index;
2798
2799   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2800
2801   if (~0 == fib_index)
2802     return VNET_API_ERROR_NO_SUCH_FIB;
2803
2804   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2805                                   flow_hash_config);
2806
2807   return 0;
2808 }
2809 #endif
2810
2811 static clib_error_t *
2812 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2813                               unformat_input_t * input,
2814                               vlib_cli_command_t * cmd)
2815 {
2816   int matched = 0;
2817   u32 table_id = 0;
2818   u32 flow_hash_config = 0;
2819   int rv;
2820
2821   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2822     {
2823       if (unformat (input, "table %d", &table_id))
2824         matched = 1;
2825 #define _(a,v) \
2826     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2827       foreach_flow_hash_bit
2828 #undef _
2829         else
2830         break;
2831     }
2832
2833   if (matched == 0)
2834     return clib_error_return (0, "unknown input `%U'",
2835                               format_unformat_error, input);
2836
2837   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2838   switch (rv)
2839     {
2840     case 0:
2841       break;
2842
2843     case -1:
2844       return clib_error_return (0, "no such FIB table %d", table_id);
2845
2846     default:
2847       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2848       break;
2849     }
2850
2851   return 0;
2852 }
2853
2854 /*?
2855  * Configure the set of IPv6 fields used by the flow hash.
2856  *
2857  * @cliexpar
2858  * @parblock
2859  * Example of how to set the flow hash on a given table:
2860  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2861  *
2862  * Example of display the configured flow hash:
2863  * @cliexstart{show ip6 fib}
2864  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2865  * @::/0
2866  *   unicast-ip6-chain
2867  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2868  *     [0] [@0]: dpo-drop ip6
2869  * fe80::/10
2870  *   unicast-ip6-chain
2871  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2872  *     [0] [@2]: dpo-receive
2873  * ff02::1/128
2874  *   unicast-ip6-chain
2875  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2876  *     [0] [@2]: dpo-receive
2877  * ff02::2/128
2878  *   unicast-ip6-chain
2879  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2880  *     [0] [@2]: dpo-receive
2881  * ff02::16/128
2882  *   unicast-ip6-chain
2883  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2884  *     [0] [@2]: dpo-receive
2885  * ff02::1:ff00:0/104
2886  *   unicast-ip6-chain
2887  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2888  *     [0] [@2]: dpo-receive
2889  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2890  * @::/0
2891  *   unicast-ip6-chain
2892  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2893  *     [0] [@0]: dpo-drop ip6
2894  * @::a:1:1:0:4/126
2895  *   unicast-ip6-chain
2896  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2897  *     [0] [@4]: ipv6-glean: af_packet0
2898  * @::a:1:1:0:7/128
2899  *   unicast-ip6-chain
2900  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2901  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2902  * fe80::/10
2903  *   unicast-ip6-chain
2904  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2905  *     [0] [@2]: dpo-receive
2906  * fe80::fe:3eff:fe3e:9222/128
2907  *   unicast-ip6-chain
2908  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2909  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2910  * ff02::1/128
2911  *   unicast-ip6-chain
2912  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2913  *     [0] [@2]: dpo-receive
2914  * ff02::2/128
2915  *   unicast-ip6-chain
2916  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2917  *     [0] [@2]: dpo-receive
2918  * ff02::16/128
2919  *   unicast-ip6-chain
2920  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2921  *     [0] [@2]: dpo-receive
2922  * ff02::1:ff00:0/104
2923  *   unicast-ip6-chain
2924  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2925  *     [0] [@2]: dpo-receive
2926  * @cliexend
2927  * @endparblock
2928 ?*/
2929 /* *INDENT-OFF* */
2930 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
2931 {
2932   .path = "set ip6 flow-hash",
2933   .short_help =
2934   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2935   .function = set_ip6_flow_hash_command_fn,
2936 };
2937 /* *INDENT-ON* */
2938
2939 static clib_error_t *
2940 show_ip6_local_command_fn (vlib_main_t * vm,
2941                            unformat_input_t * input, vlib_cli_command_t * cmd)
2942 {
2943   ip6_main_t *im = &ip6_main;
2944   ip_lookup_main_t *lm = &im->lookup_main;
2945   int i;
2946
2947   vlib_cli_output (vm, "Protocols handled by ip6_local");
2948   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2949     {
2950       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2951         {
2952
2953           u32 node_index = vlib_get_node (vm,
2954                                           ip6_local_node.index)->
2955             next_nodes[lm->local_next_by_ip_protocol[i]];
2956           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2957                            node_index);
2958         }
2959     }
2960   return 0;
2961 }
2962
2963
2964
2965 /*?
2966  * Display the set of protocols handled by the local IPv6 stack.
2967  *
2968  * @cliexpar
2969  * Example of how to display local protocol table:
2970  * @cliexstart{show ip6 local}
2971  * Protocols handled by ip6_local
2972  * 17
2973  * 43
2974  * 58
2975  * 115
2976  * @cliexend
2977 ?*/
2978 /* *INDENT-OFF* */
2979 VLIB_CLI_COMMAND (show_ip6_local, static) =
2980 {
2981   .path = "show ip6 local",
2982   .function = show_ip6_local_command_fn,
2983   .short_help = "show ip6 local",
2984 };
2985 /* *INDENT-ON* */
2986
2987 #ifndef CLIB_MARCH_VARIANT
2988 int
2989 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2990                              u32 table_index)
2991 {
2992   vnet_main_t *vnm = vnet_get_main ();
2993   vnet_interface_main_t *im = &vnm->interface_main;
2994   ip6_main_t *ipm = &ip6_main;
2995   ip_lookup_main_t *lm = &ipm->lookup_main;
2996   vnet_classify_main_t *cm = &vnet_classify_main;
2997   ip6_address_t *if_addr;
2998
2999   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3000     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3001
3002   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3003     return VNET_API_ERROR_NO_SUCH_ENTRY;
3004
3005   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3006   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3007
3008   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3009
3010   if (NULL != if_addr)
3011     {
3012       fib_prefix_t pfx = {
3013         .fp_len = 128,
3014         .fp_proto = FIB_PROTOCOL_IP6,
3015         .fp_addr.ip6 = *if_addr,
3016       };
3017       u32 fib_index;
3018
3019       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3020                                                        sw_if_index);
3021
3022
3023       if (table_index != (u32) ~ 0)
3024         {
3025           dpo_id_t dpo = DPO_INVALID;
3026
3027           dpo_set (&dpo,
3028                    DPO_CLASSIFY,
3029                    DPO_PROTO_IP6,
3030                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3031
3032           fib_table_entry_special_dpo_add (fib_index,
3033                                            &pfx,
3034                                            FIB_SOURCE_CLASSIFY,
3035                                            FIB_ENTRY_FLAG_NONE, &dpo);
3036           dpo_reset (&dpo);
3037         }
3038       else
3039         {
3040           fib_table_entry_special_remove (fib_index,
3041                                           &pfx, FIB_SOURCE_CLASSIFY);
3042         }
3043     }
3044
3045   return 0;
3046 }
3047 #endif
3048
3049 static clib_error_t *
3050 set_ip6_classify_command_fn (vlib_main_t * vm,
3051                              unformat_input_t * input,
3052                              vlib_cli_command_t * cmd)
3053 {
3054   u32 table_index = ~0;
3055   int table_index_set = 0;
3056   u32 sw_if_index = ~0;
3057   int rv;
3058
3059   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3060     {
3061       if (unformat (input, "table-index %d", &table_index))
3062         table_index_set = 1;
3063       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3064                          vnet_get_main (), &sw_if_index))
3065         ;
3066       else
3067         break;
3068     }
3069
3070   if (table_index_set == 0)
3071     return clib_error_return (0, "classify table-index must be specified");
3072
3073   if (sw_if_index == ~0)
3074     return clib_error_return (0, "interface / subif must be specified");
3075
3076   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3077
3078   switch (rv)
3079     {
3080     case 0:
3081       break;
3082
3083     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3084       return clib_error_return (0, "No such interface");
3085
3086     case VNET_API_ERROR_NO_SUCH_ENTRY:
3087       return clib_error_return (0, "No such classifier table");
3088     }
3089   return 0;
3090 }
3091
3092 /*?
3093  * Assign a classification table to an interface. The classification
3094  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3095  * commands. Once the table is create, use this command to filter packets
3096  * on an interface.
3097  *
3098  * @cliexpar
3099  * Example of how to assign a classification table to an interface:
3100  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3101 ?*/
3102 /* *INDENT-OFF* */
3103 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3104 {
3105   .path = "set ip6 classify",
3106   .short_help =
3107   "set ip6 classify intfc <interface> table-index <classify-idx>",
3108   .function = set_ip6_classify_command_fn,
3109 };
3110 /* *INDENT-ON* */
3111
3112 static clib_error_t *
3113 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3114 {
3115   ip6_main_t *im = &ip6_main;
3116   uword heapsize = 0;
3117   u32 tmp;
3118   u32 nbuckets = 0;
3119
3120   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3121     {
3122       if (unformat (input, "hash-buckets %d", &tmp))
3123         nbuckets = tmp;
3124       else if (unformat (input, "heap-size %U",
3125                          unformat_memory_size, &heapsize))
3126         ;
3127       else
3128         return clib_error_return (0, "unknown input '%U'",
3129                                   format_unformat_error, input);
3130     }
3131
3132   im->lookup_table_nbuckets = nbuckets;
3133   im->lookup_table_size = heapsize;
3134
3135   return 0;
3136 }
3137
3138 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3139
3140 /*
3141  * fd.io coding-style-patch-verification: ON
3142  *
3143  * Local Variables:
3144  * eval: (c-set-style "gnu")
3145  * End:
3146  */