IP load-balance; perf improvement using the usual reciepe
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_neighbor.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 #ifndef CLIB_MARCH_VARIANT
54 #include <vppinfra/bihash_template.c>
55 #endif
56 #include <vnet/ip/ip6_forward.h>
57 #include <vnet/interface_output.h>
58
59 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
60 #define OI_DECAP   0x80000000
61
62 static void
63 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
64                           ip6_main_t * im, u32 fib_index,
65                           ip_interface_address_t * a)
66 {
67   ip_lookup_main_t *lm = &im->lookup_main;
68   ip6_address_t *address = ip_interface_address_get_address (lm, a);
69   fib_prefix_t pfx = {
70     .fp_len = a->address_length,
71     .fp_proto = FIB_PROTOCOL_IP6,
72     .fp_addr.ip6 = *address,
73   };
74
75   if (a->address_length < 128)
76     {
77       fib_table_entry_update_one_path (fib_index,
78                                        &pfx,
79                                        FIB_SOURCE_INTERFACE,
80                                        (FIB_ENTRY_FLAG_CONNECTED |
81                                         FIB_ENTRY_FLAG_ATTACHED),
82                                        DPO_PROTO_IP6,
83                                        /* No next-hop address */
84                                        NULL, sw_if_index,
85                                        /* invalid FIB index */
86                                        ~0, 1,
87                                        /* no label stack */
88                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
89     }
90
91   pfx.fp_len = 128;
92   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
93     {
94       u32 classify_table_index =
95         lm->classify_table_index_by_sw_if_index[sw_if_index];
96       if (classify_table_index != (u32) ~ 0)
97         {
98           dpo_id_t dpo = DPO_INVALID;
99
100           dpo_set (&dpo,
101                    DPO_CLASSIFY,
102                    DPO_PROTO_IP6,
103                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
104
105           fib_table_entry_special_dpo_add (fib_index,
106                                            &pfx,
107                                            FIB_SOURCE_CLASSIFY,
108                                            FIB_ENTRY_FLAG_NONE, &dpo);
109           dpo_reset (&dpo);
110         }
111     }
112
113   fib_table_entry_update_one_path (fib_index, &pfx,
114                                    FIB_SOURCE_INTERFACE,
115                                    (FIB_ENTRY_FLAG_CONNECTED |
116                                     FIB_ENTRY_FLAG_LOCAL),
117                                    DPO_PROTO_IP6,
118                                    &pfx.fp_addr,
119                                    sw_if_index, ~0,
120                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
121 }
122
123 static void
124 ip6_del_interface_routes (ip6_main_t * im,
125                           u32 fib_index,
126                           ip6_address_t * address, u32 address_length)
127 {
128   fib_prefix_t pfx = {
129     .fp_len = address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   if (pfx.fp_len < 128)
135     {
136       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
137
138     }
139
140   pfx.fp_len = 128;
141   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
142 }
143
144 #ifndef CLIB_MARCH_VARIANT
145 void
146 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
147 {
148   ip6_main_t *im = &ip6_main;
149
150   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
151
152   /*
153    * enable/disable only on the 1<->0 transition
154    */
155   if (is_enable)
156     {
157       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
158         return;
159     }
160   else
161     {
162       /* The ref count is 0 when an address is removed from an interface that has
163        * no address - this is not a ciritical error */
164       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
165           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
166         return;
167     }
168
169   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
170                                !is_enable, 0, 0);
171
172   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
173                                sw_if_index, !is_enable, 0, 0);
174 }
175
176 /* get first interface address */
177 ip6_address_t *
178 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
179 {
180   ip_lookup_main_t *lm = &im->lookup_main;
181   ip_interface_address_t *ia = 0;
182   ip6_address_t *result = 0;
183
184   /* *INDENT-OFF* */
185   foreach_ip_interface_address (lm, ia, sw_if_index,
186                                 1 /* honor unnumbered */,
187   ({
188     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
189     result = a;
190     break;
191   }));
192   /* *INDENT-ON* */
193   return result;
194 }
195
196 clib_error_t *
197 ip6_add_del_interface_address (vlib_main_t * vm,
198                                u32 sw_if_index,
199                                ip6_address_t * address,
200                                u32 address_length, u32 is_del)
201 {
202   vnet_main_t *vnm = vnet_get_main ();
203   ip6_main_t *im = &ip6_main;
204   ip_lookup_main_t *lm = &im->lookup_main;
205   clib_error_t *error;
206   u32 if_address_index;
207   ip6_address_fib_t ip6_af, *addr_fib = 0;
208   ip6_address_t ll_addr;
209
210   /* local0 interface doesn't support IP addressing */
211   if (sw_if_index == 0)
212     {
213       return
214         clib_error_create ("local0 interface doesn't support IP addressing");
215     }
216
217   if (ip6_address_is_link_local_unicast (address))
218     {
219       if (address_length != 128)
220         {
221           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
222           return
223             clib_error_create
224             ("prefix length of link-local address must be 128");
225         }
226       if (!is_del)
227         {
228           return ip6_neighbor_set_link_local_address (vm, sw_if_index,
229                                                       address);
230         }
231       else
232         {
233           ll_addr = ip6_neighbor_get_link_local_address (sw_if_index);
234           if (ip6_address_is_equal (&ll_addr, address))
235             {
236               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
237               return clib_error_create ("address not deletable");
238             }
239           else
240             {
241               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
242               return clib_error_create ("address not found");
243             }
244         }
245     }
246
247   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
248   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
249
250   ip6_addr_fib_init (&ip6_af, address,
251                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
252   vec_add1 (addr_fib, ip6_af);
253
254   /* *INDENT-OFF* */
255   if (!is_del)
256     {
257       /* When adding an address check that it does not conflict
258          with an existing address on any interface in this table. */
259       ip_interface_address_t *ia;
260       vnet_sw_interface_t *sif;
261
262       pool_foreach(sif, vnm->interface_main.sw_interfaces,
263       ({
264           if (im->fib_index_by_sw_if_index[sw_if_index] ==
265               im->fib_index_by_sw_if_index[sif->sw_if_index])
266             {
267               foreach_ip_interface_address
268                 (&im->lookup_main, ia, sif->sw_if_index,
269                  0 /* honor unnumbered */ ,
270                  ({
271                    ip6_address_t * x =
272                      ip_interface_address_get_address
273                      (&im->lookup_main, ia);
274                    if (ip6_destination_matches_route
275                        (im, address, x, ia->address_length) ||
276                        ip6_destination_matches_route (im,
277                                                       x,
278                                                       address,
279                                                       address_length))
280                      {
281                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
282                        return
283                          clib_error_create
284                          ("failed to add %U which conflicts with %U for interface %U",
285                           format_ip6_address_and_length, address,
286                           address_length,
287                           format_ip6_address_and_length, x,
288                           ia->address_length,
289                           format_vnet_sw_if_index_name, vnm,
290                           sif->sw_if_index);
291                      }
292                  }));
293             }
294       }));
295     }
296   /* *INDENT-ON* */
297
298   {
299     uword elts_before = pool_elts (lm->if_address_pool);
300
301     error = ip_interface_address_add_del
302       (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
303     if (error)
304       goto done;
305
306     /* Pool did not grow: add duplicate address. */
307     if (elts_before == pool_elts (lm->if_address_pool))
308       goto done;
309   }
310
311   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
312
313   if (is_del)
314     ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
315   else
316     ip6_add_interface_routes (vnm, sw_if_index,
317                               im, ip6_af.fib_index,
318                               pool_elt_at_index (lm->if_address_pool,
319                                                  if_address_index));
320
321   {
322     ip6_add_del_interface_address_callback_t *cb;
323     vec_foreach (cb, im->add_del_interface_address_callbacks)
324       cb->function (im, cb->function_opaque, sw_if_index,
325                     address, address_length, if_address_index, is_del);
326   }
327
328 done:
329   vec_free (addr_fib);
330   return error;
331 }
332
333 #endif
334
335 static clib_error_t *
336 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
337 {
338   ip6_main_t *im = &ip6_main;
339   ip_interface_address_t *ia;
340   ip6_address_t *a;
341   u32 is_admin_up, fib_index;
342
343   /* Fill in lookup tables with default table (0). */
344   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
345
346   vec_validate_init_empty (im->
347                            lookup_main.if_address_pool_index_by_sw_if_index,
348                            sw_if_index, ~0);
349
350   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
351
352   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
353
354   /* *INDENT-OFF* */
355   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
356                                 0 /* honor unnumbered */,
357   ({
358     a = ip_interface_address_get_address (&im->lookup_main, ia);
359     if (is_admin_up)
360       ip6_add_interface_routes (vnm, sw_if_index,
361                                 im, fib_index,
362                                 ia);
363     else
364       ip6_del_interface_routes (im, fib_index,
365                                 a, ia->address_length);
366   }));
367   /* *INDENT-ON* */
368
369   return 0;
370 }
371
372 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
373
374 /* Built-in ip6 unicast rx feature path definition */
375 /* *INDENT-OFF* */
376 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
377 {
378   .arc_name  = "ip6-unicast",
379   .start_nodes = VNET_FEATURES ("ip6-input"),
380   .last_in_arc = "ip6-lookup",
381   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
382 };
383
384 VNET_FEATURE_INIT (ip6_flow_classify, static) =
385 {
386   .arc_name = "ip6-unicast",
387   .node_name = "ip6-flow-classify",
388   .runs_before = VNET_FEATURES ("ip6-inacl"),
389 };
390
391 VNET_FEATURE_INIT (ip6_inacl, static) =
392 {
393   .arc_name = "ip6-unicast",
394   .node_name = "ip6-inacl",
395   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
396 };
397
398 VNET_FEATURE_INIT (ip6_policer_classify, static) =
399 {
400   .arc_name = "ip6-unicast",
401   .node_name = "ip6-policer-classify",
402   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
403 };
404
405 VNET_FEATURE_INIT (ip6_ipsec, static) =
406 {
407   .arc_name = "ip6-unicast",
408   .node_name = "ipsec6-input-feature",
409   .runs_before = VNET_FEATURES ("l2tp-decap"),
410 };
411
412 VNET_FEATURE_INIT (ip6_l2tp, static) =
413 {
414   .arc_name = "ip6-unicast",
415   .node_name = "l2tp-decap",
416   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
417 };
418
419 VNET_FEATURE_INIT (ip6_vpath, static) =
420 {
421   .arc_name = "ip6-unicast",
422   .node_name = "vpath-input-ip6",
423   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
424 };
425
426 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
427 {
428   .arc_name = "ip6-unicast",
429   .node_name = "ip6-vxlan-bypass",
430   .runs_before = VNET_FEATURES ("ip6-lookup"),
431 };
432
433 VNET_FEATURE_INIT (ip6_not_enabled, static) =
434 {
435   .arc_name = "ip6-unicast",
436   .node_name = "ip6-not-enabled",
437   .runs_before = VNET_FEATURES ("ip6-lookup"),
438 };
439
440 VNET_FEATURE_INIT (ip6_lookup, static) =
441 {
442   .arc_name = "ip6-unicast",
443   .node_name = "ip6-lookup",
444   .runs_before = 0,  /*last feature*/
445 };
446
447 /* Built-in ip6 multicast rx feature path definition (none now) */
448 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
449 {
450   .arc_name  = "ip6-multicast",
451   .start_nodes = VNET_FEATURES ("ip6-input"),
452   .last_in_arc = "ip6-mfib-forward-lookup",
453   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
454 };
455
456 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
457   .arc_name = "ip6-multicast",
458   .node_name = "vpath-input-ip6",
459   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
460 };
461
462 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
463   .arc_name = "ip6-multicast",
464   .node_name = "ip6-not-enabled",
465   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
466 };
467
468 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
469   .arc_name = "ip6-multicast",
470   .node_name = "ip6-mfib-forward-lookup",
471   .runs_before = 0, /* last feature */
472 };
473
474 /* Built-in ip4 tx feature path definition */
475 VNET_FEATURE_ARC_INIT (ip6_output, static) =
476 {
477   .arc_name  = "ip6-output",
478   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
479   .last_in_arc = "interface-output",
480   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
481 };
482
483 VNET_FEATURE_INIT (ip6_outacl, static) = {
484   .arc_name = "ip6-output",
485   .node_name = "ip6-outacl",
486   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
487 };
488
489 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
490   .arc_name = "ip6-output",
491   .node_name = "ipsec6-output-feature",
492   .runs_before = VNET_FEATURES ("interface-output"),
493 };
494
495 VNET_FEATURE_INIT (ip6_interface_output, static) = {
496   .arc_name = "ip6-output",
497   .node_name = "interface-output",
498   .runs_before = 0, /* not before any other features */
499 };
500 /* *INDENT-ON* */
501
502 static clib_error_t *
503 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
504 {
505   ip6_main_t *im = &ip6_main;
506
507   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
508   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
509
510   if (!is_add)
511     {
512       /* Ensure that IPv6 is disabled */
513       ip6_main_t *im6 = &ip6_main;
514       ip_lookup_main_t *lm6 = &im6->lookup_main;
515       ip_interface_address_t *ia = 0;
516       ip6_address_t *address;
517       vlib_main_t *vm = vlib_get_main ();
518
519       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
520       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
521       /* *INDENT-OFF* */
522       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
523       ({
524         address = ip_interface_address_get_address (lm6, ia);
525         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
526       }));
527       /* *INDENT-ON* */
528       ip6_mfib_interface_enable_disable (sw_if_index, 0);
529     }
530
531   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
532                                is_add, 0, 0);
533
534   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
535                                sw_if_index, is_add, 0, 0);
536
537   return /* no error */ 0;
538 }
539
540 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
541
542 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
543                                 vlib_node_runtime_t * node,
544                                 vlib_frame_t * frame)
545 {
546   return ip6_lookup_inline (vm, node, frame);
547 }
548
549 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
550
551 /* *INDENT-OFF* */
552 VLIB_REGISTER_NODE (ip6_lookup_node) =
553 {
554   .name = "ip6-lookup",
555   .vector_size = sizeof (u32),
556   .format_trace = format_ip6_lookup_trace,
557   .n_next_nodes = IP6_LOOKUP_N_NEXT,
558   .next_nodes = IP6_LOOKUP_NEXT_NODES,
559 };
560 /* *INDENT-ON* */
561
562 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
563                                       vlib_node_runtime_t * node,
564                                       vlib_frame_t * frame)
565 {
566   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
567   u32 n_left, *from;
568   u32 thread_index = vm->thread_index;
569   ip6_main_t *im = &ip6_main;
570   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
571   u16 nexts[VLIB_FRAME_SIZE], *next;
572
573   from = vlib_frame_vector_args (frame);
574   n_left = frame->n_vectors;
575   next = nexts;
576
577   vlib_get_buffers (vm, from, bufs, n_left);
578
579   while (n_left >= 4)
580     {
581       const load_balance_t *lb0, *lb1;
582       const ip6_header_t *ip0, *ip1;
583       u32 lbi0, hc0, lbi1, hc1;
584       const dpo_id_t *dpo0, *dpo1;
585
586       /* Prefetch next iteration. */
587       {
588         vlib_prefetch_buffer_header (b[2], STORE);
589         vlib_prefetch_buffer_header (b[3], STORE);
590
591         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
592         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
593       }
594
595       ip0 = vlib_buffer_get_current (b[0]);
596       ip1 = vlib_buffer_get_current (b[1]);
597       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
598       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
599
600       lb0 = load_balance_get (lbi0);
601       lb1 = load_balance_get (lbi1);
602
603       /*
604        * this node is for via FIBs we can re-use the hash value from the
605        * to node if present.
606        * We don't want to use the same hash value at each level in the recursion
607        * graph as that would lead to polarisation
608        */
609       hc0 = hc1 = 0;
610
611       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
612         {
613           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
614             {
615               hc0 = vnet_buffer (b[0])->ip.flow_hash =
616                 vnet_buffer (b[0])->ip.flow_hash >> 1;
617             }
618           else
619             {
620               hc0 = vnet_buffer (b[0])->ip.flow_hash =
621                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
622             }
623           dpo0 = load_balance_get_fwd_bucket
624             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
625         }
626       else
627         {
628           dpo0 = load_balance_get_bucket_i (lb0, 0);
629         }
630       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
631         {
632           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
633             {
634               hc1 = vnet_buffer (b[1])->ip.flow_hash =
635                 vnet_buffer (b[1])->ip.flow_hash >> 1;
636             }
637           else
638             {
639               hc1 = vnet_buffer (b[1])->ip.flow_hash =
640                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
641             }
642           dpo1 = load_balance_get_fwd_bucket
643             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
644         }
645       else
646         {
647           dpo1 = load_balance_get_bucket_i (lb1, 0);
648         }
649
650       next[0] = dpo0->dpoi_next_node;
651       next[1] = dpo1->dpoi_next_node;
652
653       /* Only process the HBH Option Header if explicitly configured to do so */
654       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
655         {
656           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
657             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
658         }
659       /* Only process the HBH Option Header if explicitly configured to do so */
660       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
661         {
662           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
663             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
664         }
665
666       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
667       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
668
669       vlib_increment_combined_counter
670         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
671       vlib_increment_combined_counter
672         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
673
674       b += 2;
675       next += 2;
676       n_left -= 2;
677     }
678
679   while (n_left > 0)
680     {
681       const load_balance_t *lb0;
682       const ip6_header_t *ip0;
683       const dpo_id_t *dpo0;
684       u32 lbi0, hc0;
685
686       ip0 = vlib_buffer_get_current (b[0]);
687       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
688
689       lb0 = load_balance_get (lbi0);
690
691       hc0 = 0;
692       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
693         {
694           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
695             {
696               hc0 = vnet_buffer (b[0])->ip.flow_hash =
697                 vnet_buffer (b[0])->ip.flow_hash >> 1;
698             }
699           else
700             {
701               hc0 = vnet_buffer (b[0])->ip.flow_hash =
702                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
703             }
704           dpo0 = load_balance_get_fwd_bucket
705             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
706         }
707       else
708         {
709           dpo0 = load_balance_get_bucket_i (lb0, 0);
710         }
711
712       next[0] = dpo0->dpoi_next_node;
713       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
714
715       /* Only process the HBH Option Header if explicitly configured to do so */
716       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
717         {
718           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
719             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
720         }
721
722       vlib_increment_combined_counter
723         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
724
725       b += 1;
726       next += 1;
727       n_left -= 1;
728     }
729
730   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
731
732   if (node->flags & VLIB_NODE_FLAG_TRACE)
733     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
734
735   return frame->n_vectors;
736 }
737
738 /* *INDENT-OFF* */
739 VLIB_REGISTER_NODE (ip6_load_balance_node) =
740 {
741   .name = "ip6-load-balance",
742   .vector_size = sizeof (u32),
743   .sibling_of = "ip6-lookup",
744   .format_trace = format_ip6_lookup_trace,
745 };
746 /* *INDENT-ON* */
747
748 typedef struct
749 {
750   /* Adjacency taken. */
751   u32 adj_index;
752   u32 flow_hash;
753   u32 fib_index;
754
755   /* Packet data, possibly *after* rewrite. */
756   u8 packet_data[128 - 1 * sizeof (u32)];
757 }
758 ip6_forward_next_trace_t;
759
760 #ifndef CLIB_MARCH_VARIANT
761 u8 *
762 format_ip6_forward_next_trace (u8 * s, va_list * args)
763 {
764   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
765   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
766   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
767   u32 indent = format_get_indent (s);
768
769   s = format (s, "%U%U",
770               format_white_space, indent,
771               format_ip6_header, t->packet_data, sizeof (t->packet_data));
772   return s;
773 }
774 #endif
775
776 static u8 *
777 format_ip6_lookup_trace (u8 * s, va_list * args)
778 {
779   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
780   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
781   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
782   u32 indent = format_get_indent (s);
783
784   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
785               t->fib_index, t->adj_index, t->flow_hash);
786   s = format (s, "\n%U%U",
787               format_white_space, indent,
788               format_ip6_header, t->packet_data, sizeof (t->packet_data));
789   return s;
790 }
791
792
793 static u8 *
794 format_ip6_rewrite_trace (u8 * s, va_list * args)
795 {
796   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
797   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
798   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
799   u32 indent = format_get_indent (s);
800
801   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
802               t->fib_index, t->adj_index, format_ip_adjacency,
803               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
804   s = format (s, "\n%U%U",
805               format_white_space, indent,
806               format_ip_adjacency_packet_data,
807               t->adj_index, t->packet_data, sizeof (t->packet_data));
808   return s;
809 }
810
811 /* Common trace function for all ip6-forward next nodes. */
812 #ifndef CLIB_MARCH_VARIANT
813 void
814 ip6_forward_next_trace (vlib_main_t * vm,
815                         vlib_node_runtime_t * node,
816                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
817 {
818   u32 *from, n_left;
819   ip6_main_t *im = &ip6_main;
820
821   n_left = frame->n_vectors;
822   from = vlib_frame_vector_args (frame);
823
824   while (n_left >= 4)
825     {
826       u32 bi0, bi1;
827       vlib_buffer_t *b0, *b1;
828       ip6_forward_next_trace_t *t0, *t1;
829
830       /* Prefetch next iteration. */
831       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
832       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
833
834       bi0 = from[0];
835       bi1 = from[1];
836
837       b0 = vlib_get_buffer (vm, bi0);
838       b1 = vlib_get_buffer (vm, bi1);
839
840       if (b0->flags & VLIB_BUFFER_IS_TRACED)
841         {
842           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
843           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
844           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
845           t0->fib_index =
846             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
847              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
848             vec_elt (im->fib_index_by_sw_if_index,
849                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
850
851           clib_memcpy_fast (t0->packet_data,
852                             vlib_buffer_get_current (b0),
853                             sizeof (t0->packet_data));
854         }
855       if (b1->flags & VLIB_BUFFER_IS_TRACED)
856         {
857           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
858           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
859           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
860           t1->fib_index =
861             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
862              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
863             vec_elt (im->fib_index_by_sw_if_index,
864                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
865
866           clib_memcpy_fast (t1->packet_data,
867                             vlib_buffer_get_current (b1),
868                             sizeof (t1->packet_data));
869         }
870       from += 2;
871       n_left -= 2;
872     }
873
874   while (n_left >= 1)
875     {
876       u32 bi0;
877       vlib_buffer_t *b0;
878       ip6_forward_next_trace_t *t0;
879
880       bi0 = from[0];
881
882       b0 = vlib_get_buffer (vm, bi0);
883
884       if (b0->flags & VLIB_BUFFER_IS_TRACED)
885         {
886           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
887           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
888           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
889           t0->fib_index =
890             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
891              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
892             vec_elt (im->fib_index_by_sw_if_index,
893                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
894
895           clib_memcpy_fast (t0->packet_data,
896                             vlib_buffer_get_current (b0),
897                             sizeof (t0->packet_data));
898         }
899       from += 1;
900       n_left -= 1;
901     }
902 }
903
904 /* Compute TCP/UDP/ICMP6 checksum in software. */
905 u16
906 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
907                                    ip6_header_t * ip0, int *bogus_lengthp)
908 {
909   ip_csum_t sum0;
910   u16 sum16, payload_length_host_byte_order;
911   u32 i, n_this_buffer, n_bytes_left;
912   u32 headers_size = sizeof (ip0[0]);
913   void *data_this_buffer;
914
915   ASSERT (bogus_lengthp);
916   *bogus_lengthp = 0;
917
918   /* Initialize checksum with ip header. */
919   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
920   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
921   data_this_buffer = (void *) (ip0 + 1);
922
923   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
924     {
925       sum0 = ip_csum_with_carry (sum0,
926                                  clib_mem_unaligned (&ip0->
927                                                      src_address.as_uword[i],
928                                                      uword));
929       sum0 =
930         ip_csum_with_carry (sum0,
931                             clib_mem_unaligned (&ip0->dst_address.as_uword[i],
932                                                 uword));
933     }
934
935   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
936    * or UDP-Ping packets */
937   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
938     {
939       u32 skip_bytes;
940       ip6_hop_by_hop_ext_t *ext_hdr =
941         (ip6_hop_by_hop_ext_t *) data_this_buffer;
942
943       /* validate really icmp6 next */
944       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
945               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
946
947       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
948       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
949
950       payload_length_host_byte_order -= skip_bytes;
951       headers_size += skip_bytes;
952     }
953
954   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
955   if (p0 && n_this_buffer + headers_size > p0->current_length)
956     n_this_buffer =
957       p0->current_length >
958       headers_size ? p0->current_length - headers_size : 0;
959   while (1)
960     {
961       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
962       n_bytes_left -= n_this_buffer;
963       if (n_bytes_left == 0)
964         break;
965
966       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
967         {
968           *bogus_lengthp = 1;
969           return 0xfefe;
970         }
971       p0 = vlib_get_buffer (vm, p0->next_buffer);
972       data_this_buffer = vlib_buffer_get_current (p0);
973       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
974     }
975
976   sum16 = ~ip_csum_fold (sum0);
977
978   return sum16;
979 }
980
981 u32
982 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
983 {
984   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
985   udp_header_t *udp0;
986   u16 sum16;
987   int bogus_length;
988
989   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
990   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
991           || ip0->protocol == IP_PROTOCOL_ICMP6
992           || ip0->protocol == IP_PROTOCOL_UDP
993           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
994
995   udp0 = (void *) (ip0 + 1);
996   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
997     {
998       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
999                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1000       return p0->flags;
1001     }
1002
1003   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1004
1005   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1006                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1007
1008   return p0->flags;
1009 }
1010 #endif
1011
1012 /**
1013  * @brief returns number of links on which src is reachable.
1014  */
1015 always_inline int
1016 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1017 {
1018   const load_balance_t *lb0;
1019   index_t lbi;
1020   u32 fib_index;
1021
1022   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1023                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1024   fib_index =
1025     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1026     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1027
1028   lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address);
1029   lb0 = load_balance_get (lbi);
1030
1031   return (fib_urpf_check_size (lb0->lb_urpf));
1032 }
1033
1034 always_inline u8
1035 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1036                            u32 * udp_offset0)
1037 {
1038   u32 proto0;
1039   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1040   if (proto0 != IP_PROTOCOL_UDP)
1041     {
1042       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1043       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1044     }
1045   return proto0;
1046 }
1047
1048 /* *INDENT-OFF* */
1049 VNET_FEATURE_ARC_INIT (ip6_local) =
1050 {
1051   .arc_name  = "ip6-local",
1052   .start_nodes = VNET_FEATURES ("ip6-local"),
1053 };
1054 /* *INDENT-ON* */
1055
1056 always_inline uword
1057 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1058                   vlib_frame_t * frame, int head_of_feature_arc)
1059 {
1060   ip6_main_t *im = &ip6_main;
1061   ip_lookup_main_t *lm = &im->lookup_main;
1062   u32 *from, n_left_from;
1063   vlib_node_runtime_t *error_node =
1064     vlib_node_get_runtime (vm, ip6_input_node.index);
1065   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1066   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1067   u16 nexts[VLIB_FRAME_SIZE], *next;
1068
1069   from = vlib_frame_vector_args (frame);
1070   n_left_from = frame->n_vectors;
1071
1072   if (node->flags & VLIB_NODE_FLAG_TRACE)
1073     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1074
1075   vlib_get_buffers (vm, from, bufs, n_left_from);
1076   b = bufs;
1077   next = nexts;
1078
1079   while (n_left_from > 2)
1080     {
1081       /* Prefetch next iteration. */
1082       if (n_left_from >= 6)
1083         {
1084           vlib_prefetch_buffer_header (b[4], STORE);
1085           vlib_prefetch_buffer_header (b[5], STORE);
1086           vlib_prefetch_buffer_data (b[2], LOAD);
1087           vlib_prefetch_buffer_data (b[3], LOAD);
1088         }
1089
1090       u8 error[2];
1091       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1092       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1093
1094       ip6_header_t *ip[2];
1095       ip[0] = vlib_buffer_get_current (b[0]);
1096       ip[1] = vlib_buffer_get_current (b[1]);
1097
1098       if (head_of_feature_arc)
1099         {
1100           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1101           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1102
1103           u8 type[2];
1104           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1105           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1106
1107           u32 flags[2];
1108           flags[0] = b[0]->flags;
1109           flags[1] = b[1]->flags;
1110
1111           u32 good_l4_csum[2];
1112           good_l4_csum[0] =
1113             flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1114                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1115                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1116           good_l4_csum[1] =
1117             flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1118                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1119                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1120
1121           u32 udp_offset[2] = { };
1122           u8 is_tcp_udp[2];
1123           is_tcp_udp[0] =
1124             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1125           is_tcp_udp[1] =
1126             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1127           i16 len_diff[2] = { 0 };
1128           if (PREDICT_TRUE (is_tcp_udp[0]))
1129             {
1130               udp_header_t *udp =
1131                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1132               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1133                 && udp->checksum == 0;
1134               /* optimistically verify UDP length. */
1135               u16 ip_len, udp_len;
1136               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1137               udp_len = clib_net_to_host_u16 (udp->length);
1138               len_diff[0] = ip_len - udp_len;
1139             }
1140           if (PREDICT_TRUE (is_tcp_udp[1]))
1141             {
1142               udp_header_t *udp =
1143                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1144               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1145                 && udp->checksum == 0;
1146               /* optimistically verify UDP length. */
1147               u16 ip_len, udp_len;
1148               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1149               udp_len = clib_net_to_host_u16 (udp->length);
1150               len_diff[1] = ip_len - udp_len;
1151             }
1152
1153           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1154           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1155
1156           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1157           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1158
1159           u8 need_csum[2];
1160           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1161             && !good_l4_csum[0]
1162             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1163           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1164             && !good_l4_csum[1]
1165             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1166           if (PREDICT_FALSE (need_csum[0]))
1167             {
1168               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1169               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1170             }
1171           if (PREDICT_FALSE (need_csum[1]))
1172             {
1173               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1174               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1175             }
1176
1177           error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1178           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1179           error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1180           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1181
1182           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1183                          IP6_ERROR_UDP_CHECKSUM,
1184                          "Wrong IP6 errors constants");
1185           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1186                          IP6_ERROR_ICMP_CHECKSUM,
1187                          "Wrong IP6 errors constants");
1188
1189           error[0] =
1190             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1191           error[1] =
1192             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1193
1194           /* Drop packets from unroutable hosts. */
1195           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1196           u8 unroutable[2];
1197           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1198             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1199             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1200           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1201             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1202             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1203           if (PREDICT_FALSE (unroutable[0]))
1204             {
1205               error[0] =
1206                 !ip6_urpf_loose_check (im, b[0],
1207                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1208                 : error[0];
1209             }
1210           if (PREDICT_FALSE (unroutable[1]))
1211             {
1212               error[1] =
1213                 !ip6_urpf_loose_check (im, b[1],
1214                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1215                 : error[1];
1216             }
1217
1218           vnet_buffer (b[0])->ip.fib_index =
1219             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1220             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1221             vnet_buffer (b[0])->ip.fib_index;
1222           vnet_buffer (b[1])->ip.fib_index =
1223             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1224             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1225             vnet_buffer (b[1])->ip.fib_index;
1226         }                       /* head_of_feature_arc */
1227
1228       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1229       next[0] =
1230         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1231       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1232       next[1] =
1233         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1234
1235       b[0]->error = error_node->errors[0];
1236       b[1]->error = error_node->errors[1];
1237
1238       if (head_of_feature_arc)
1239         {
1240           u8 ip6_unknown[2];
1241           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1242           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1243           if (PREDICT_TRUE (ip6_unknown[0]))
1244             {
1245               u32 next32 = next[0];
1246               vnet_feature_arc_start (arc_index,
1247                                       vnet_buffer (b[0])->sw_if_index
1248                                       [VLIB_RX], &next32, b[0]);
1249               next[0] = next32;
1250             }
1251           if (PREDICT_TRUE (ip6_unknown[1]))
1252             {
1253               u32 next32 = next[1];
1254               vnet_feature_arc_start (arc_index,
1255                                       vnet_buffer (b[1])->sw_if_index
1256                                       [VLIB_RX], &next32, b[1]);
1257               next[1] = next32;
1258             }
1259         }
1260
1261       /* next */
1262       b += 2;
1263       next += 2;
1264       n_left_from -= 2;
1265     }
1266
1267   while (n_left_from)
1268     {
1269       u8 error;
1270       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1271
1272       ip6_header_t *ip;
1273       ip = vlib_buffer_get_current (b[0]);
1274
1275       if (head_of_feature_arc)
1276         {
1277           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1278           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1279
1280           u32 flags = b[0]->flags;
1281           u32 good_l4_csum =
1282             flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1283                      VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1284                      VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1285
1286           u32 udp_offset;
1287           i16 len_diff = 0;
1288           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1289           if (PREDICT_TRUE (is_tcp_udp))
1290             {
1291               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1292               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1293               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1294                 && udp->checksum == 0;
1295               /* optimistically verify UDP length. */
1296               u16 ip_len, udp_len;
1297               ip_len = clib_net_to_host_u16 (ip->payload_length);
1298               udp_len = clib_net_to_host_u16 (udp->length);
1299               len_diff = ip_len - udp_len;
1300             }
1301
1302           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1303           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1304
1305           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum
1306             && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1307           if (PREDICT_FALSE (need_csum))
1308             {
1309               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1310               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1311             }
1312
1313           error = IP6_ERROR_UNKNOWN_PROTOCOL;
1314           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1315
1316           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1317                          IP6_ERROR_UDP_CHECKSUM,
1318                          "Wrong IP6 errors constants");
1319           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1320                          IP6_ERROR_ICMP_CHECKSUM,
1321                          "Wrong IP6 errors constants");
1322
1323           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1324
1325           /* Drop packets from unroutable hosts. */
1326           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1327           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1328             && type != IP_BUILTIN_PROTOCOL_ICMP
1329             && !ip6_address_is_link_local_unicast (&ip->src_address);
1330           if (PREDICT_FALSE (unroutable))
1331             {
1332               error =
1333                 !ip6_urpf_loose_check (im, b[0],
1334                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1335                 error;
1336             }
1337
1338           vnet_buffer (b[0])->ip.fib_index =
1339             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1340             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1341             vnet_buffer (b[0])->ip.fib_index;
1342         }                       /* head_of_feature_arc */
1343
1344       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1345       next[0] =
1346         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1347
1348       b[0]->error = error_node->errors[0];
1349
1350       if (head_of_feature_arc)
1351         {
1352           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1353             {
1354               u32 next32 = next[0];
1355               vnet_feature_arc_start (arc_index,
1356                                       vnet_buffer (b[0])->sw_if_index
1357                                       [VLIB_RX], &next32, b[0]);
1358               next[0] = next32;
1359             }
1360         }
1361
1362       /* next */
1363       b += 1;
1364       next += 1;
1365       n_left_from -= 1;
1366     }
1367
1368   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1369   return frame->n_vectors;
1370 }
1371
1372 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1373                                vlib_frame_t * frame)
1374 {
1375   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1376 }
1377
1378 /* *INDENT-OFF* */
1379 VLIB_REGISTER_NODE (ip6_local_node) =
1380 {
1381   .name = "ip6-local",
1382   .vector_size = sizeof (u32),
1383   .format_trace = format_ip6_forward_next_trace,
1384   .n_next_nodes = IP_LOCAL_N_NEXT,
1385   .next_nodes =
1386   {
1387     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1388     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1389     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1390     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1391     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-reassembly",
1392   },
1393 };
1394 /* *INDENT-ON* */
1395
1396 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1397                                           vlib_node_runtime_t * node,
1398                                           vlib_frame_t * frame)
1399 {
1400   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1401 }
1402
1403 /* *INDENT-OFF* */
1404 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1405   .name = "ip6-local-end-of-arc",
1406   .vector_size = sizeof (u32),
1407
1408   .format_trace = format_ip6_forward_next_trace,
1409   .sibling_of = "ip6-local",
1410 };
1411
1412 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1413   .arc_name = "ip6-local",
1414   .node_name = "ip6-local-end-of-arc",
1415   .runs_before = 0, /* not before any other features */
1416 };
1417 /* *INDENT-ON* */
1418
1419 #ifdef CLIB_MARCH_VARIANT
1420 extern vlib_node_registration_t ip6_local_node;
1421
1422 #else
1423
1424 void
1425 ip6_register_protocol (u32 protocol, u32 node_index)
1426 {
1427   vlib_main_t *vm = vlib_get_main ();
1428   ip6_main_t *im = &ip6_main;
1429   ip_lookup_main_t *lm = &im->lookup_main;
1430
1431   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1432   lm->local_next_by_ip_protocol[protocol] =
1433     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1434 }
1435
1436 clib_error_t *
1437 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index,
1438                     u8 refresh)
1439 {
1440   vnet_main_t *vnm = vnet_get_main ();
1441   ip6_main_t *im = &ip6_main;
1442   icmp6_neighbor_solicitation_header_t *h;
1443   ip6_address_t *src;
1444   ip_interface_address_t *ia;
1445   ip_adjacency_t *adj;
1446   vnet_hw_interface_t *hi;
1447   vnet_sw_interface_t *si;
1448   vlib_buffer_t *b;
1449   adj_index_t ai;
1450   u32 bi = 0;
1451   int bogus_length;
1452
1453   si = vnet_get_sw_interface (vnm, sw_if_index);
1454
1455   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1456     {
1457       return clib_error_return (0, "%U: interface %U down",
1458                                 format_ip6_address, dst,
1459                                 format_vnet_sw_if_index_name, vnm,
1460                                 sw_if_index);
1461     }
1462
1463   src =
1464     ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1465   if (!src)
1466     {
1467       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1468       return clib_error_return
1469         (0, "no matching interface address for destination %U (interface %U)",
1470          format_ip6_address, dst,
1471          format_vnet_sw_if_index_name, vnm, sw_if_index);
1472     }
1473
1474   h =
1475     vlib_packet_template_get_packet (vm,
1476                                      &im->discover_neighbor_packet_template,
1477                                      &bi);
1478   if (!h)
1479     return clib_error_return (0, "ICMP6 NS packet allocation failed");
1480
1481   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1482
1483   /* Destination address is a solicited node multicast address.  We need to fill in
1484      the low 24 bits with low 24 bits of target's address. */
1485   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1486   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1487   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1488
1489   h->ip.src_address = src[0];
1490   h->neighbor.target_address = dst[0];
1491
1492   if (PREDICT_FALSE (!hi->hw_address))
1493     {
1494       return clib_error_return (0, "%U: interface %U do not support ip probe",
1495                                 format_ip6_address, dst,
1496                                 format_vnet_sw_if_index_name, vnm,
1497                                 sw_if_index);
1498     }
1499
1500   clib_memcpy_fast (h->link_layer_option.ethernet_address, hi->hw_address,
1501                     vec_len (hi->hw_address));
1502
1503   h->neighbor.icmp.checksum =
1504     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1505   ASSERT (bogus_length == 0);
1506
1507   b = vlib_get_buffer (vm, bi);
1508   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1509     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1510
1511   /* Add encapsulation string for software interface (e.g. ethernet header). */
1512   ip46_address_t nh = {
1513     .ip6 = *dst,
1514   };
1515
1516   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
1517                             VNET_LINK_IP6, &nh, sw_if_index);
1518   adj = adj_get (ai);
1519
1520   /* Peer has been previously resolved, retrieve glean adj instead */
1521   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE && refresh == 0)
1522     {
1523       adj_unlock (ai);
1524       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
1525                                   VNET_LINK_IP6, sw_if_index, &nh);
1526       adj = adj_get (ai);
1527     }
1528
1529   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1530   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1531
1532   {
1533     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1534     u32 *to_next = vlib_frame_vector_args (f);
1535     to_next[0] = bi;
1536     f->n_vectors = 1;
1537     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1538   }
1539
1540   adj_unlock (ai);
1541   return /* no error */ 0;
1542 }
1543 #endif
1544
1545 typedef enum
1546 {
1547   IP6_REWRITE_NEXT_DROP,
1548   IP6_REWRITE_NEXT_ICMP_ERROR,
1549   IP6_REWRITE_NEXT_FRAGMENT,
1550   IP6_REWRITE_N_NEXT            /* Last */
1551 } ip6_rewrite_next_t;
1552
1553 /**
1554  * This bits of an IPv6 address to mask to construct a multicast
1555  * MAC address
1556  */
1557 #define IP6_MCAST_ADDR_MASK 0xffffffff
1558
1559 always_inline void
1560 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1561                u16 adj_packet_bytes, bool is_locally_generated,
1562                u32 * next, u32 * error)
1563 {
1564   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1565     {
1566       if (is_locally_generated)
1567         {
1568           /* IP fragmentation */
1569           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1570                                    IP6_FRAG_NEXT_IP6_REWRITE, 0);
1571           *next = IP6_REWRITE_NEXT_FRAGMENT;
1572           *error = IP6_ERROR_MTU_EXCEEDED;
1573         }
1574       else
1575         {
1576           *error = IP6_ERROR_MTU_EXCEEDED;
1577           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1578                                        adj_packet_bytes);
1579           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1580         }
1581     }
1582 }
1583
1584 always_inline uword
1585 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1586                              vlib_node_runtime_t * node,
1587                              vlib_frame_t * frame,
1588                              int do_counters, int is_midchain, int is_mcast,
1589                              int do_gso)
1590 {
1591   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1592   u32 *from = vlib_frame_vector_args (frame);
1593   u32 n_left_from, n_left_to_next, *to_next, next_index;
1594   vlib_node_runtime_t *error_node =
1595     vlib_node_get_runtime (vm, ip6_input_node.index);
1596
1597   n_left_from = frame->n_vectors;
1598   next_index = node->cached_next_index;
1599   u32 thread_index = vm->thread_index;
1600
1601   while (n_left_from > 0)
1602     {
1603       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1604
1605       while (n_left_from >= 4 && n_left_to_next >= 2)
1606         {
1607           ip_adjacency_t *adj0, *adj1;
1608           vlib_buffer_t *p0, *p1;
1609           ip6_header_t *ip0, *ip1;
1610           u32 pi0, rw_len0, next0, error0, adj_index0;
1611           u32 pi1, rw_len1, next1, error1, adj_index1;
1612           u32 tx_sw_if_index0, tx_sw_if_index1;
1613           bool is_locally_originated0, is_locally_originated1;
1614
1615           /* Prefetch next iteration. */
1616           {
1617             vlib_buffer_t *p2, *p3;
1618
1619             p2 = vlib_get_buffer (vm, from[2]);
1620             p3 = vlib_get_buffer (vm, from[3]);
1621
1622             vlib_prefetch_buffer_header (p2, LOAD);
1623             vlib_prefetch_buffer_header (p3, LOAD);
1624
1625             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1626             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1627
1628             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1629             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1630           }
1631
1632           pi0 = to_next[0] = from[0];
1633           pi1 = to_next[1] = from[1];
1634
1635           from += 2;
1636           n_left_from -= 2;
1637           to_next += 2;
1638           n_left_to_next -= 2;
1639
1640           p0 = vlib_get_buffer (vm, pi0);
1641           p1 = vlib_get_buffer (vm, pi1);
1642
1643           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1644           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1645
1646           ip0 = vlib_buffer_get_current (p0);
1647           ip1 = vlib_buffer_get_current (p1);
1648
1649           error0 = error1 = IP6_ERROR_NONE;
1650           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1651
1652           is_locally_originated0 =
1653             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1654           if (PREDICT_TRUE (!is_locally_originated0))
1655             {
1656               i32 hop_limit0 = ip0->hop_limit;
1657
1658               /* Input node should have reject packets with hop limit 0. */
1659               ASSERT (ip0->hop_limit > 0);
1660
1661               hop_limit0 -= 1;
1662
1663               ip0->hop_limit = hop_limit0;
1664
1665               /*
1666                * If the hop count drops below 1 when forwarding, generate
1667                * an ICMP response.
1668                */
1669               if (PREDICT_FALSE (hop_limit0 <= 0))
1670                 {
1671                   error0 = IP6_ERROR_TIME_EXPIRED;
1672                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1673                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1674                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1675                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1676                                                0);
1677                 }
1678             }
1679           else
1680             {
1681               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1682             }
1683           is_locally_originated1 =
1684             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1685           if (PREDICT_TRUE (!is_locally_originated1))
1686             {
1687               i32 hop_limit1 = ip1->hop_limit;
1688
1689               /* Input node should have reject packets with hop limit 0. */
1690               ASSERT (ip1->hop_limit > 0);
1691
1692               hop_limit1 -= 1;
1693
1694               ip1->hop_limit = hop_limit1;
1695
1696               /*
1697                * If the hop count drops below 1 when forwarding, generate
1698                * an ICMP response.
1699                */
1700               if (PREDICT_FALSE (hop_limit1 <= 0))
1701                 {
1702                   error1 = IP6_ERROR_TIME_EXPIRED;
1703                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1704                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1705                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1706                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1707                                                0);
1708                 }
1709             }
1710           else
1711             {
1712               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1713             }
1714           adj0 = adj_get (adj_index0);
1715           adj1 = adj_get (adj_index1);
1716
1717           rw_len0 = adj0[0].rewrite_header.data_bytes;
1718           rw_len1 = adj1[0].rewrite_header.data_bytes;
1719           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1720           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1721
1722           if (do_counters)
1723             {
1724               vlib_increment_combined_counter
1725                 (&adjacency_counters,
1726                  thread_index, adj_index0, 1,
1727                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1728               vlib_increment_combined_counter
1729                 (&adjacency_counters,
1730                  thread_index, adj_index1, 1,
1731                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1732             }
1733
1734           /* Check MTU of outgoing interface. */
1735           u16 ip0_len =
1736             clib_net_to_host_u16 (ip0->payload_length) +
1737             sizeof (ip6_header_t);
1738           u16 ip1_len =
1739             clib_net_to_host_u16 (ip1->payload_length) +
1740             sizeof (ip6_header_t);
1741           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1742             ip0_len = gso_mtu_sz (p0);
1743           if (do_gso && (p1->flags & VNET_BUFFER_F_GSO))
1744             ip1_len = gso_mtu_sz (p1);
1745
1746
1747
1748           ip6_mtu_check (p0, ip0_len,
1749                          adj0[0].rewrite_header.max_l3_packet_bytes,
1750                          is_locally_originated0, &next0, &error0);
1751           ip6_mtu_check (p1, ip1_len,
1752                          adj1[0].rewrite_header.max_l3_packet_bytes,
1753                          is_locally_originated1, &next1, &error1);
1754
1755           /* Don't adjust the buffer for hop count issue; icmp-error node
1756            * wants to see the IP header */
1757           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1758             {
1759               p0->current_data -= rw_len0;
1760               p0->current_length += rw_len0;
1761
1762               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1763               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1764               next0 = adj0[0].rewrite_header.next_index;
1765
1766               if (PREDICT_FALSE
1767                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1768                 vnet_feature_arc_start (lm->output_feature_arc_index,
1769                                         tx_sw_if_index0, &next0, p0);
1770             }
1771           else
1772             {
1773               p0->error = error_node->errors[error0];
1774             }
1775           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1776             {
1777               p1->current_data -= rw_len1;
1778               p1->current_length += rw_len1;
1779
1780               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1781               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1782               next1 = adj1[0].rewrite_header.next_index;
1783
1784               if (PREDICT_FALSE
1785                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1786                 vnet_feature_arc_start (lm->output_feature_arc_index,
1787                                         tx_sw_if_index1, &next1, p1);
1788             }
1789           else
1790             {
1791               p1->error = error_node->errors[error1];
1792             }
1793
1794           if (is_midchain)
1795             {
1796               /* before we paint on the next header, update the L4
1797                * checksums if required, since there's no offload on a tunnel */
1798               calc_checksums (vm, p0);
1799               calc_checksums (vm, p1);
1800             }
1801
1802           /* Guess we are only writing on simple Ethernet header. */
1803           vnet_rewrite_two_headers (adj0[0], adj1[0],
1804                                     ip0, ip1, sizeof (ethernet_header_t));
1805
1806           if (is_midchain)
1807             {
1808               if (adj0->sub_type.midchain.fixup_func)
1809                 adj0->sub_type.midchain.fixup_func
1810                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1811               if (adj1->sub_type.midchain.fixup_func)
1812                 adj1->sub_type.midchain.fixup_func
1813                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1814             }
1815           if (is_mcast)
1816             {
1817               /*
1818                * copy bytes from the IP address into the MAC rewrite
1819                */
1820               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1821                                           adj0->
1822                                           rewrite_header.dst_mcast_offset,
1823                                           &ip0->dst_address.as_u32[3],
1824                                           (u8 *) ip0);
1825               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1826                                           adj1->
1827                                           rewrite_header.dst_mcast_offset,
1828                                           &ip1->dst_address.as_u32[3],
1829                                           (u8 *) ip1);
1830             }
1831
1832           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1833                                            to_next, n_left_to_next,
1834                                            pi0, pi1, next0, next1);
1835         }
1836
1837       while (n_left_from > 0 && n_left_to_next > 0)
1838         {
1839           ip_adjacency_t *adj0;
1840           vlib_buffer_t *p0;
1841           ip6_header_t *ip0;
1842           u32 pi0, rw_len0;
1843           u32 adj_index0, next0, error0;
1844           u32 tx_sw_if_index0;
1845           bool is_locally_originated0;
1846
1847           pi0 = to_next[0] = from[0];
1848
1849           p0 = vlib_get_buffer (vm, pi0);
1850
1851           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1852
1853           adj0 = adj_get (adj_index0);
1854
1855           ip0 = vlib_buffer_get_current (p0);
1856
1857           error0 = IP6_ERROR_NONE;
1858           next0 = IP6_REWRITE_NEXT_DROP;
1859
1860           /* Check hop limit */
1861           is_locally_originated0 =
1862             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1863           if (PREDICT_TRUE (!is_locally_originated0))
1864             {
1865               i32 hop_limit0 = ip0->hop_limit;
1866
1867               ASSERT (ip0->hop_limit > 0);
1868
1869               hop_limit0 -= 1;
1870
1871               ip0->hop_limit = hop_limit0;
1872
1873               if (PREDICT_FALSE (hop_limit0 <= 0))
1874                 {
1875                   /*
1876                    * If the hop count drops below 1 when forwarding, generate
1877                    * an ICMP response.
1878                    */
1879                   error0 = IP6_ERROR_TIME_EXPIRED;
1880                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1881                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1882                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1883                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1884                                                0);
1885                 }
1886             }
1887           else
1888             {
1889               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1890             }
1891
1892           if (is_midchain)
1893             {
1894               calc_checksums (vm, p0);
1895             }
1896
1897           /* Guess we are only writing on simple Ethernet header. */
1898           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1899
1900           /* Update packet buffer attributes/set output interface. */
1901           rw_len0 = adj0[0].rewrite_header.data_bytes;
1902           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1903
1904           if (do_counters)
1905             {
1906               vlib_increment_combined_counter
1907                 (&adjacency_counters,
1908                  thread_index, adj_index0, 1,
1909                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1910             }
1911
1912           /* Check MTU of outgoing interface. */
1913           u16 ip0_len =
1914             clib_net_to_host_u16 (ip0->payload_length) +
1915             sizeof (ip6_header_t);
1916           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1917             ip0_len = gso_mtu_sz (p0);
1918
1919           ip6_mtu_check (p0, ip0_len,
1920                          adj0[0].rewrite_header.max_l3_packet_bytes,
1921                          is_locally_originated0, &next0, &error0);
1922
1923           /* Don't adjust the buffer for hop count issue; icmp-error node
1924            * wants to see the IP header */
1925           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1926             {
1927               p0->current_data -= rw_len0;
1928               p0->current_length += rw_len0;
1929
1930               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1931
1932               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1933               next0 = adj0[0].rewrite_header.next_index;
1934
1935               if (PREDICT_FALSE
1936                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1937                 vnet_feature_arc_start (lm->output_feature_arc_index,
1938                                         tx_sw_if_index0, &next0, p0);
1939             }
1940           else
1941             {
1942               p0->error = error_node->errors[error0];
1943             }
1944
1945           if (is_midchain)
1946             {
1947               if (adj0->sub_type.midchain.fixup_func)
1948                 adj0->sub_type.midchain.fixup_func
1949                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1950             }
1951           if (is_mcast)
1952             {
1953               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1954                                           adj0->
1955                                           rewrite_header.dst_mcast_offset,
1956                                           &ip0->dst_address.as_u32[3],
1957                                           (u8 *) ip0);
1958             }
1959
1960           from += 1;
1961           n_left_from -= 1;
1962           to_next += 1;
1963           n_left_to_next -= 1;
1964
1965           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1966                                            to_next, n_left_to_next,
1967                                            pi0, next0);
1968         }
1969
1970       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1971     }
1972
1973   /* Need to do trace after rewrites to pick up new packet data. */
1974   if (node->flags & VLIB_NODE_FLAG_TRACE)
1975     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1976
1977   return frame->n_vectors;
1978 }
1979
1980 always_inline uword
1981 ip6_rewrite_inline (vlib_main_t * vm,
1982                     vlib_node_runtime_t * node,
1983                     vlib_frame_t * frame,
1984                     int do_counters, int is_midchain, int is_mcast)
1985 {
1986   vnet_main_t *vnm = vnet_get_main ();
1987   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
1988     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
1989                                         is_midchain, is_mcast,
1990                                         1 /* do_gso */ );
1991   else
1992     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
1993                                         is_midchain, is_mcast,
1994                                         0 /* no do_gso */ );
1995 }
1996
1997 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
1998                                  vlib_node_runtime_t * node,
1999                                  vlib_frame_t * frame)
2000 {
2001   if (adj_are_counters_enabled ())
2002     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2003   else
2004     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2005 }
2006
2007 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2008                                        vlib_node_runtime_t * node,
2009                                        vlib_frame_t * frame)
2010 {
2011   if (adj_are_counters_enabled ())
2012     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2013   else
2014     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2015 }
2016
2017 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2018                                        vlib_node_runtime_t * node,
2019                                        vlib_frame_t * frame)
2020 {
2021   if (adj_are_counters_enabled ())
2022     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2023   else
2024     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2025 }
2026
2027 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2028                                   vlib_node_runtime_t * node,
2029                                   vlib_frame_t * frame)
2030 {
2031   if (adj_are_counters_enabled ())
2032     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2033   else
2034     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2035 }
2036
2037 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2038                                         vlib_node_runtime_t * node,
2039                                         vlib_frame_t * frame)
2040 {
2041   if (adj_are_counters_enabled ())
2042     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2043   else
2044     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2045 }
2046
2047 /* *INDENT-OFF* */
2048 VLIB_REGISTER_NODE (ip6_midchain_node) =
2049 {
2050   .name = "ip6-midchain",
2051   .vector_size = sizeof (u32),
2052   .format_trace = format_ip6_forward_next_trace,
2053   .sibling_of = "ip6-rewrite",
2054   };
2055
2056 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2057 {
2058   .name = "ip6-rewrite",
2059   .vector_size = sizeof (u32),
2060   .format_trace = format_ip6_rewrite_trace,
2061   .n_next_nodes = IP6_REWRITE_N_NEXT,
2062   .next_nodes =
2063   {
2064     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2065     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2066     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2067   },
2068 };
2069
2070 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2071   .name = "ip6-rewrite-bcast",
2072   .vector_size = sizeof (u32),
2073
2074   .format_trace = format_ip6_rewrite_trace,
2075   .sibling_of = "ip6-rewrite",
2076 };
2077
2078 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2079 {
2080   .name = "ip6-rewrite-mcast",
2081   .vector_size = sizeof (u32),
2082   .format_trace = format_ip6_rewrite_trace,
2083   .sibling_of = "ip6-rewrite",
2084 };
2085
2086
2087 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2088 {
2089   .name = "ip6-mcast-midchain",
2090   .vector_size = sizeof (u32),
2091   .format_trace = format_ip6_rewrite_trace,
2092   .sibling_of = "ip6-rewrite",
2093 };
2094
2095 /* *INDENT-ON* */
2096
2097 /*
2098  * Hop-by-Hop handling
2099  */
2100 #ifndef CLIB_MARCH_VARIANT
2101 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2102 #endif /* CLIB_MARCH_VARIANT */
2103
2104 #define foreach_ip6_hop_by_hop_error \
2105 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2106 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2107 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2108
2109 /* *INDENT-OFF* */
2110 typedef enum
2111 {
2112 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2113   foreach_ip6_hop_by_hop_error
2114 #undef _
2115   IP6_HOP_BY_HOP_N_ERROR,
2116 } ip6_hop_by_hop_error_t;
2117 /* *INDENT-ON* */
2118
2119 /*
2120  * Primary h-b-h handler trace support
2121  * We work pretty hard on the problem for obvious reasons
2122  */
2123 typedef struct
2124 {
2125   u32 next_index;
2126   u32 trace_len;
2127   u8 option_data[256];
2128 } ip6_hop_by_hop_trace_t;
2129
2130 extern vlib_node_registration_t ip6_hop_by_hop_node;
2131
2132 static char *ip6_hop_by_hop_error_strings[] = {
2133 #define _(sym,string) string,
2134   foreach_ip6_hop_by_hop_error
2135 #undef _
2136 };
2137
2138 #ifndef CLIB_MARCH_VARIANT
2139 u8 *
2140 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2141 {
2142   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2143   int total_len = va_arg (*args, int);
2144   ip6_hop_by_hop_option_t *opt0, *limit0;
2145   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2146   u8 type0;
2147
2148   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2149               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2150
2151   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2152   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2153
2154   while (opt0 < limit0)
2155     {
2156       type0 = opt0->type;
2157       switch (type0)
2158         {
2159         case 0:         /* Pad, just stop */
2160           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2161           break;
2162
2163         default:
2164           if (hm->trace[type0])
2165             {
2166               s = (*hm->trace[type0]) (s, opt0);
2167             }
2168           else
2169             {
2170               s =
2171                 format (s, "\n    unrecognized option %d length %d", type0,
2172                         opt0->length);
2173             }
2174           opt0 =
2175             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2176                                          sizeof (ip6_hop_by_hop_option_t));
2177           break;
2178         }
2179     }
2180   return s;
2181 }
2182 #endif
2183
2184 static u8 *
2185 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2186 {
2187   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2188   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2189   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2190   ip6_hop_by_hop_header_t *hbh0;
2191   ip6_hop_by_hop_option_t *opt0, *limit0;
2192   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2193
2194   u8 type0;
2195
2196   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2197
2198   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2199               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2200
2201   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2202   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2203
2204   while (opt0 < limit0)
2205     {
2206       type0 = opt0->type;
2207       switch (type0)
2208         {
2209         case 0:         /* Pad, just stop */
2210           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2211           break;
2212
2213         default:
2214           if (hm->trace[type0])
2215             {
2216               s = (*hm->trace[type0]) (s, opt0);
2217             }
2218           else
2219             {
2220               s =
2221                 format (s, "\n    unrecognized option %d length %d", type0,
2222                         opt0->length);
2223             }
2224           opt0 =
2225             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2226                                          sizeof (ip6_hop_by_hop_option_t));
2227           break;
2228         }
2229     }
2230   return s;
2231 }
2232
2233 always_inline u8
2234 ip6_scan_hbh_options (vlib_buffer_t * b0,
2235                       ip6_header_t * ip0,
2236                       ip6_hop_by_hop_header_t * hbh0,
2237                       ip6_hop_by_hop_option_t * opt0,
2238                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2239 {
2240   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2241   u8 type0;
2242   u8 error0 = 0;
2243
2244   while (opt0 < limit0)
2245     {
2246       type0 = opt0->type;
2247       switch (type0)
2248         {
2249         case 0:         /* Pad1 */
2250           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2251           continue;
2252         case 1:         /* PadN */
2253           break;
2254         default:
2255           if (hm->options[type0])
2256             {
2257               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2258                 {
2259                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2260                   return (error0);
2261                 }
2262             }
2263           else
2264             {
2265               /* Unrecognized mandatory option, check the two high order bits */
2266               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2267                 {
2268                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2269                   break;
2270                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2271                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2272                   *next0 = IP_LOOKUP_NEXT_DROP;
2273                   break;
2274                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2275                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2276                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2277                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2278                                                ICMP6_parameter_problem_unrecognized_option,
2279                                                (u8 *) opt0 - (u8 *) ip0);
2280                   break;
2281                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2282                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2283                   if (!ip6_address_is_multicast (&ip0->dst_address))
2284                     {
2285                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2286                       icmp6_error_set_vnet_buffer (b0,
2287                                                    ICMP6_parameter_problem,
2288                                                    ICMP6_parameter_problem_unrecognized_option,
2289                                                    (u8 *) opt0 - (u8 *) ip0);
2290                     }
2291                   else
2292                     {
2293                       *next0 = IP_LOOKUP_NEXT_DROP;
2294                     }
2295                   break;
2296                 }
2297               return (error0);
2298             }
2299         }
2300       opt0 =
2301         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2302                                      sizeof (ip6_hop_by_hop_option_t));
2303     }
2304   return (error0);
2305 }
2306
2307 /*
2308  * Process the Hop-by-Hop Options header
2309  */
2310 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2311                                     vlib_node_runtime_t * node,
2312                                     vlib_frame_t * frame)
2313 {
2314   vlib_node_runtime_t *error_node =
2315     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2316   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2317   u32 n_left_from, *from, *to_next;
2318   ip_lookup_next_t next_index;
2319
2320   from = vlib_frame_vector_args (frame);
2321   n_left_from = frame->n_vectors;
2322   next_index = node->cached_next_index;
2323
2324   while (n_left_from > 0)
2325     {
2326       u32 n_left_to_next;
2327
2328       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2329
2330       while (n_left_from >= 4 && n_left_to_next >= 2)
2331         {
2332           u32 bi0, bi1;
2333           vlib_buffer_t *b0, *b1;
2334           u32 next0, next1;
2335           ip6_header_t *ip0, *ip1;
2336           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2337           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2338           u8 error0 = 0, error1 = 0;
2339
2340           /* Prefetch next iteration. */
2341           {
2342             vlib_buffer_t *p2, *p3;
2343
2344             p2 = vlib_get_buffer (vm, from[2]);
2345             p3 = vlib_get_buffer (vm, from[3]);
2346
2347             vlib_prefetch_buffer_header (p2, LOAD);
2348             vlib_prefetch_buffer_header (p3, LOAD);
2349
2350             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2351             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2352           }
2353
2354           /* Speculatively enqueue b0, b1 to the current next frame */
2355           to_next[0] = bi0 = from[0];
2356           to_next[1] = bi1 = from[1];
2357           from += 2;
2358           to_next += 2;
2359           n_left_from -= 2;
2360           n_left_to_next -= 2;
2361
2362           b0 = vlib_get_buffer (vm, bi0);
2363           b1 = vlib_get_buffer (vm, bi1);
2364
2365           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2366           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2367           ip_adjacency_t *adj0 = adj_get (adj_index0);
2368           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2369           ip_adjacency_t *adj1 = adj_get (adj_index1);
2370
2371           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2372           next0 = adj0->lookup_next_index;
2373           next1 = adj1->lookup_next_index;
2374
2375           ip0 = vlib_buffer_get_current (b0);
2376           ip1 = vlib_buffer_get_current (b1);
2377           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2378           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2379           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2380           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2381           limit0 =
2382             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2383                                          ((hbh0->length + 1) << 3));
2384           limit1 =
2385             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2386                                          ((hbh1->length + 1) << 3));
2387
2388           /*
2389            * Basic validity checks
2390            */
2391           if ((hbh0->length + 1) << 3 >
2392               clib_net_to_host_u16 (ip0->payload_length))
2393             {
2394               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2395               next0 = IP_LOOKUP_NEXT_DROP;
2396               goto outdual;
2397             }
2398           /* Scan the set of h-b-h options, process ones that we understand */
2399           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2400
2401           if ((hbh1->length + 1) << 3 >
2402               clib_net_to_host_u16 (ip1->payload_length))
2403             {
2404               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2405               next1 = IP_LOOKUP_NEXT_DROP;
2406               goto outdual;
2407             }
2408           /* Scan the set of h-b-h options, process ones that we understand */
2409           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2410
2411         outdual:
2412           /* Has the classifier flagged this buffer for special treatment? */
2413           if (PREDICT_FALSE
2414               ((error0 == 0)
2415                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2416             next0 = hm->next_override;
2417
2418           /* Has the classifier flagged this buffer for special treatment? */
2419           if (PREDICT_FALSE
2420               ((error1 == 0)
2421                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2422             next1 = hm->next_override;
2423
2424           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2425             {
2426               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2427                 {
2428                   ip6_hop_by_hop_trace_t *t =
2429                     vlib_add_trace (vm, node, b0, sizeof (*t));
2430                   u32 trace_len = (hbh0->length + 1) << 3;
2431                   t->next_index = next0;
2432                   /* Capture the h-b-h option verbatim */
2433                   trace_len =
2434                     trace_len <
2435                     ARRAY_LEN (t->option_data) ? trace_len :
2436                     ARRAY_LEN (t->option_data);
2437                   t->trace_len = trace_len;
2438                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2439                 }
2440               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2441                 {
2442                   ip6_hop_by_hop_trace_t *t =
2443                     vlib_add_trace (vm, node, b1, sizeof (*t));
2444                   u32 trace_len = (hbh1->length + 1) << 3;
2445                   t->next_index = next1;
2446                   /* Capture the h-b-h option verbatim */
2447                   trace_len =
2448                     trace_len <
2449                     ARRAY_LEN (t->option_data) ? trace_len :
2450                     ARRAY_LEN (t->option_data);
2451                   t->trace_len = trace_len;
2452                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2453                 }
2454
2455             }
2456
2457           b0->error = error_node->errors[error0];
2458           b1->error = error_node->errors[error1];
2459
2460           /* verify speculative enqueue, maybe switch current next frame */
2461           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2462                                            n_left_to_next, bi0, bi1, next0,
2463                                            next1);
2464         }
2465
2466       while (n_left_from > 0 && n_left_to_next > 0)
2467         {
2468           u32 bi0;
2469           vlib_buffer_t *b0;
2470           u32 next0;
2471           ip6_header_t *ip0;
2472           ip6_hop_by_hop_header_t *hbh0;
2473           ip6_hop_by_hop_option_t *opt0, *limit0;
2474           u8 error0 = 0;
2475
2476           /* Speculatively enqueue b0 to the current next frame */
2477           bi0 = from[0];
2478           to_next[0] = bi0;
2479           from += 1;
2480           to_next += 1;
2481           n_left_from -= 1;
2482           n_left_to_next -= 1;
2483
2484           b0 = vlib_get_buffer (vm, bi0);
2485           /*
2486            * Default use the next_index from the adjacency.
2487            * A HBH option rarely redirects to a different node
2488            */
2489           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2490           ip_adjacency_t *adj0 = adj_get (adj_index0);
2491           next0 = adj0->lookup_next_index;
2492
2493           ip0 = vlib_buffer_get_current (b0);
2494           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2495           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2496           limit0 =
2497             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2498                                          ((hbh0->length + 1) << 3));
2499
2500           /*
2501            * Basic validity checks
2502            */
2503           if ((hbh0->length + 1) << 3 >
2504               clib_net_to_host_u16 (ip0->payload_length))
2505             {
2506               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2507               next0 = IP_LOOKUP_NEXT_DROP;
2508               goto out0;
2509             }
2510
2511           /* Scan the set of h-b-h options, process ones that we understand */
2512           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2513
2514         out0:
2515           /* Has the classifier flagged this buffer for special treatment? */
2516           if (PREDICT_FALSE
2517               ((error0 == 0)
2518                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2519             next0 = hm->next_override;
2520
2521           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2522             {
2523               ip6_hop_by_hop_trace_t *t =
2524                 vlib_add_trace (vm, node, b0, sizeof (*t));
2525               u32 trace_len = (hbh0->length + 1) << 3;
2526               t->next_index = next0;
2527               /* Capture the h-b-h option verbatim */
2528               trace_len =
2529                 trace_len <
2530                 ARRAY_LEN (t->option_data) ? trace_len :
2531                 ARRAY_LEN (t->option_data);
2532               t->trace_len = trace_len;
2533               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2534             }
2535
2536           b0->error = error_node->errors[error0];
2537
2538           /* verify speculative enqueue, maybe switch current next frame */
2539           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2540                                            n_left_to_next, bi0, next0);
2541         }
2542       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2543     }
2544   return frame->n_vectors;
2545 }
2546
2547 /* *INDENT-OFF* */
2548 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2549 {
2550   .name = "ip6-hop-by-hop",
2551   .sibling_of = "ip6-lookup",
2552   .vector_size = sizeof (u32),
2553   .format_trace = format_ip6_hop_by_hop_trace,
2554   .type = VLIB_NODE_TYPE_INTERNAL,
2555   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2556   .error_strings = ip6_hop_by_hop_error_strings,
2557   .n_next_nodes = 0,
2558 };
2559 /* *INDENT-ON* */
2560
2561 static clib_error_t *
2562 ip6_hop_by_hop_init (vlib_main_t * vm)
2563 {
2564   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2565   clib_memset (hm->options, 0, sizeof (hm->options));
2566   clib_memset (hm->trace, 0, sizeof (hm->trace));
2567   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2568   return (0);
2569 }
2570
2571 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2572
2573 #ifndef CLIB_MARCH_VARIANT
2574 void
2575 ip6_hbh_set_next_override (uword next)
2576 {
2577   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2578
2579   hm->next_override = next;
2580 }
2581
2582 int
2583 ip6_hbh_register_option (u8 option,
2584                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2585                                       ip6_hop_by_hop_option_t * opt),
2586                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2587 {
2588   ip6_main_t *im = &ip6_main;
2589   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2590
2591   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2592
2593   /* Already registered */
2594   if (hm->options[option])
2595     return (-1);
2596
2597   hm->options[option] = options;
2598   hm->trace[option] = trace;
2599
2600   /* Set global variable */
2601   im->hbh_enabled = 1;
2602
2603   return (0);
2604 }
2605
2606 int
2607 ip6_hbh_unregister_option (u8 option)
2608 {
2609   ip6_main_t *im = &ip6_main;
2610   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2611
2612   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2613
2614   /* Not registered */
2615   if (!hm->options[option])
2616     return (-1);
2617
2618   hm->options[option] = NULL;
2619   hm->trace[option] = NULL;
2620
2621   /* Disable global knob if this was the last option configured */
2622   int i;
2623   bool found = false;
2624   for (i = 0; i < 256; i++)
2625     {
2626       if (hm->options[option])
2627         {
2628           found = true;
2629           break;
2630         }
2631     }
2632   if (!found)
2633     im->hbh_enabled = 0;
2634
2635   return (0);
2636 }
2637
2638 /* Global IP6 main. */
2639 ip6_main_t ip6_main;
2640 #endif
2641
2642 static clib_error_t *
2643 ip6_lookup_init (vlib_main_t * vm)
2644 {
2645   ip6_main_t *im = &ip6_main;
2646   clib_error_t *error;
2647   uword i;
2648
2649   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2650     return error;
2651
2652   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2653     {
2654       u32 j, i0, i1;
2655
2656       i0 = i / 32;
2657       i1 = i % 32;
2658
2659       for (j = 0; j < i0; j++)
2660         im->fib_masks[i].as_u32[j] = ~0;
2661
2662       if (i1)
2663         im->fib_masks[i].as_u32[i0] =
2664           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2665     }
2666
2667   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2668
2669   if (im->lookup_table_nbuckets == 0)
2670     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2671
2672   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2673
2674   if (im->lookup_table_size == 0)
2675     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2676
2677   clib_bihash_init_24_8 (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2678                          "ip6 FIB fwding table",
2679                          im->lookup_table_nbuckets, im->lookup_table_size);
2680   clib_bihash_init_24_8 (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2681                          "ip6 FIB non-fwding table",
2682                          im->lookup_table_nbuckets, im->lookup_table_size);
2683   clib_bihash_init_40_8 (&im->ip6_mtable.ip6_mhash,
2684                          "ip6 mFIB table",
2685                          im->lookup_table_nbuckets, im->lookup_table_size);
2686
2687   /* Create FIB with index 0 and table id of 0. */
2688   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2689                                      FIB_SOURCE_DEFAULT_ROUTE);
2690   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2691                                       MFIB_SOURCE_DEFAULT_ROUTE);
2692
2693   {
2694     pg_node_t *pn;
2695     pn = pg_get_node (ip6_lookup_node.index);
2696     pn->unformat_edit = unformat_pg_ip6_header;
2697   }
2698
2699   /* Unless explicitly configured, don't process HBH options */
2700   im->hbh_enabled = 0;
2701
2702   {
2703     icmp6_neighbor_solicitation_header_t p;
2704
2705     clib_memset (&p, 0, sizeof (p));
2706
2707     p.ip.ip_version_traffic_class_and_flow_label =
2708       clib_host_to_net_u32 (0x6 << 28);
2709     p.ip.payload_length =
2710       clib_host_to_net_u16 (sizeof (p) -
2711                             STRUCT_OFFSET_OF
2712                             (icmp6_neighbor_solicitation_header_t, neighbor));
2713     p.ip.protocol = IP_PROTOCOL_ICMP6;
2714     p.ip.hop_limit = 255;
2715     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2716
2717     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2718
2719     p.link_layer_option.header.type =
2720       ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2721     p.link_layer_option.header.n_data_u64s =
2722       sizeof (p.link_layer_option) / sizeof (u64);
2723
2724     vlib_packet_template_init (vm,
2725                                &im->discover_neighbor_packet_template,
2726                                &p, sizeof (p),
2727                                /* alloc chunk size */ 8,
2728                                "ip6 neighbor discovery");
2729   }
2730
2731   return error;
2732 }
2733
2734 VLIB_INIT_FUNCTION (ip6_lookup_init);
2735
2736 static clib_error_t *
2737 test_ip6_link_command_fn (vlib_main_t * vm,
2738                           unformat_input_t * input, vlib_cli_command_t * cmd)
2739 {
2740   u8 mac[6];
2741   ip6_address_t _a, *a = &_a;
2742
2743   if (unformat (input, "%U", unformat_ethernet_address, mac))
2744     {
2745       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2746       vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
2747       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2748       vlib_cli_output (vm, "Original MAC address: %U",
2749                        format_ethernet_address, mac);
2750     }
2751
2752   return 0;
2753 }
2754
2755 /*?
2756  * This command converts the given MAC Address into an IPv6 link-local
2757  * address.
2758  *
2759  * @cliexpar
2760  * Example of how to create an IPv6 link-local address:
2761  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2762  * Link local address: fe80::14d9:e0ff:fe91:7986
2763  * Original MAC address: 16:d9:e0:91:79:86
2764  * @cliexend
2765 ?*/
2766 /* *INDENT-OFF* */
2767 VLIB_CLI_COMMAND (test_link_command, static) =
2768 {
2769   .path = "test ip6 link",
2770   .function = test_ip6_link_command_fn,
2771   .short_help = "test ip6 link <mac-address>",
2772 };
2773 /* *INDENT-ON* */
2774
2775 #ifndef CLIB_MARCH_VARIANT
2776 int
2777 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2778 {
2779   u32 fib_index;
2780
2781   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2782
2783   if (~0 == fib_index)
2784     return VNET_API_ERROR_NO_SUCH_FIB;
2785
2786   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2787                                   flow_hash_config);
2788
2789   return 0;
2790 }
2791 #endif
2792
2793 static clib_error_t *
2794 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2795                               unformat_input_t * input,
2796                               vlib_cli_command_t * cmd)
2797 {
2798   int matched = 0;
2799   u32 table_id = 0;
2800   u32 flow_hash_config = 0;
2801   int rv;
2802
2803   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2804     {
2805       if (unformat (input, "table %d", &table_id))
2806         matched = 1;
2807 #define _(a,v) \
2808     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2809       foreach_flow_hash_bit
2810 #undef _
2811         else
2812         break;
2813     }
2814
2815   if (matched == 0)
2816     return clib_error_return (0, "unknown input `%U'",
2817                               format_unformat_error, input);
2818
2819   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2820   switch (rv)
2821     {
2822     case 0:
2823       break;
2824
2825     case -1:
2826       return clib_error_return (0, "no such FIB table %d", table_id);
2827
2828     default:
2829       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2830       break;
2831     }
2832
2833   return 0;
2834 }
2835
2836 /*?
2837  * Configure the set of IPv6 fields used by the flow hash.
2838  *
2839  * @cliexpar
2840  * @parblock
2841  * Example of how to set the flow hash on a given table:
2842  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2843  *
2844  * Example of display the configured flow hash:
2845  * @cliexstart{show ip6 fib}
2846  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2847  * @::/0
2848  *   unicast-ip6-chain
2849  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2850  *     [0] [@0]: dpo-drop ip6
2851  * fe80::/10
2852  *   unicast-ip6-chain
2853  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2854  *     [0] [@2]: dpo-receive
2855  * ff02::1/128
2856  *   unicast-ip6-chain
2857  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2858  *     [0] [@2]: dpo-receive
2859  * ff02::2/128
2860  *   unicast-ip6-chain
2861  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2862  *     [0] [@2]: dpo-receive
2863  * ff02::16/128
2864  *   unicast-ip6-chain
2865  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2866  *     [0] [@2]: dpo-receive
2867  * ff02::1:ff00:0/104
2868  *   unicast-ip6-chain
2869  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2870  *     [0] [@2]: dpo-receive
2871  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2872  * @::/0
2873  *   unicast-ip6-chain
2874  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2875  *     [0] [@0]: dpo-drop ip6
2876  * @::a:1:1:0:4/126
2877  *   unicast-ip6-chain
2878  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2879  *     [0] [@4]: ipv6-glean: af_packet0
2880  * @::a:1:1:0:7/128
2881  *   unicast-ip6-chain
2882  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2883  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2884  * fe80::/10
2885  *   unicast-ip6-chain
2886  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2887  *     [0] [@2]: dpo-receive
2888  * fe80::fe:3eff:fe3e:9222/128
2889  *   unicast-ip6-chain
2890  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2891  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2892  * ff02::1/128
2893  *   unicast-ip6-chain
2894  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2895  *     [0] [@2]: dpo-receive
2896  * ff02::2/128
2897  *   unicast-ip6-chain
2898  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2899  *     [0] [@2]: dpo-receive
2900  * ff02::16/128
2901  *   unicast-ip6-chain
2902  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2903  *     [0] [@2]: dpo-receive
2904  * ff02::1:ff00:0/104
2905  *   unicast-ip6-chain
2906  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2907  *     [0] [@2]: dpo-receive
2908  * @cliexend
2909  * @endparblock
2910 ?*/
2911 /* *INDENT-OFF* */
2912 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
2913 {
2914   .path = "set ip6 flow-hash",
2915   .short_help =
2916   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2917   .function = set_ip6_flow_hash_command_fn,
2918 };
2919 /* *INDENT-ON* */
2920
2921 static clib_error_t *
2922 show_ip6_local_command_fn (vlib_main_t * vm,
2923                            unformat_input_t * input, vlib_cli_command_t * cmd)
2924 {
2925   ip6_main_t *im = &ip6_main;
2926   ip_lookup_main_t *lm = &im->lookup_main;
2927   int i;
2928
2929   vlib_cli_output (vm, "Protocols handled by ip6_local");
2930   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2931     {
2932       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2933         {
2934
2935           u32 node_index = vlib_get_node (vm,
2936                                           ip6_local_node.index)->
2937             next_nodes[lm->local_next_by_ip_protocol[i]];
2938           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2939                            node_index);
2940         }
2941     }
2942   return 0;
2943 }
2944
2945
2946
2947 /*?
2948  * Display the set of protocols handled by the local IPv6 stack.
2949  *
2950  * @cliexpar
2951  * Example of how to display local protocol table:
2952  * @cliexstart{show ip6 local}
2953  * Protocols handled by ip6_local
2954  * 17
2955  * 43
2956  * 58
2957  * 115
2958  * @cliexend
2959 ?*/
2960 /* *INDENT-OFF* */
2961 VLIB_CLI_COMMAND (show_ip6_local, static) =
2962 {
2963   .path = "show ip6 local",
2964   .function = show_ip6_local_command_fn,
2965   .short_help = "show ip6 local",
2966 };
2967 /* *INDENT-ON* */
2968
2969 #ifndef CLIB_MARCH_VARIANT
2970 int
2971 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2972                              u32 table_index)
2973 {
2974   vnet_main_t *vnm = vnet_get_main ();
2975   vnet_interface_main_t *im = &vnm->interface_main;
2976   ip6_main_t *ipm = &ip6_main;
2977   ip_lookup_main_t *lm = &ipm->lookup_main;
2978   vnet_classify_main_t *cm = &vnet_classify_main;
2979   ip6_address_t *if_addr;
2980
2981   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2982     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2983
2984   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2985     return VNET_API_ERROR_NO_SUCH_ENTRY;
2986
2987   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2988   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2989
2990   if_addr = ip6_interface_first_address (ipm, sw_if_index);
2991
2992   if (NULL != if_addr)
2993     {
2994       fib_prefix_t pfx = {
2995         .fp_len = 128,
2996         .fp_proto = FIB_PROTOCOL_IP6,
2997         .fp_addr.ip6 = *if_addr,
2998       };
2999       u32 fib_index;
3000
3001       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3002                                                        sw_if_index);
3003
3004
3005       if (table_index != (u32) ~ 0)
3006         {
3007           dpo_id_t dpo = DPO_INVALID;
3008
3009           dpo_set (&dpo,
3010                    DPO_CLASSIFY,
3011                    DPO_PROTO_IP6,
3012                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3013
3014           fib_table_entry_special_dpo_add (fib_index,
3015                                            &pfx,
3016                                            FIB_SOURCE_CLASSIFY,
3017                                            FIB_ENTRY_FLAG_NONE, &dpo);
3018           dpo_reset (&dpo);
3019         }
3020       else
3021         {
3022           fib_table_entry_special_remove (fib_index,
3023                                           &pfx, FIB_SOURCE_CLASSIFY);
3024         }
3025     }
3026
3027   return 0;
3028 }
3029 #endif
3030
3031 static clib_error_t *
3032 set_ip6_classify_command_fn (vlib_main_t * vm,
3033                              unformat_input_t * input,
3034                              vlib_cli_command_t * cmd)
3035 {
3036   u32 table_index = ~0;
3037   int table_index_set = 0;
3038   u32 sw_if_index = ~0;
3039   int rv;
3040
3041   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3042     {
3043       if (unformat (input, "table-index %d", &table_index))
3044         table_index_set = 1;
3045       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3046                          vnet_get_main (), &sw_if_index))
3047         ;
3048       else
3049         break;
3050     }
3051
3052   if (table_index_set == 0)
3053     return clib_error_return (0, "classify table-index must be specified");
3054
3055   if (sw_if_index == ~0)
3056     return clib_error_return (0, "interface / subif must be specified");
3057
3058   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3059
3060   switch (rv)
3061     {
3062     case 0:
3063       break;
3064
3065     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3066       return clib_error_return (0, "No such interface");
3067
3068     case VNET_API_ERROR_NO_SUCH_ENTRY:
3069       return clib_error_return (0, "No such classifier table");
3070     }
3071   return 0;
3072 }
3073
3074 /*?
3075  * Assign a classification table to an interface. The classification
3076  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3077  * commands. Once the table is create, use this command to filter packets
3078  * on an interface.
3079  *
3080  * @cliexpar
3081  * Example of how to assign a classification table to an interface:
3082  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3083 ?*/
3084 /* *INDENT-OFF* */
3085 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3086 {
3087   .path = "set ip6 classify",
3088   .short_help =
3089   "set ip6 classify intfc <interface> table-index <classify-idx>",
3090   .function = set_ip6_classify_command_fn,
3091 };
3092 /* *INDENT-ON* */
3093
3094 static clib_error_t *
3095 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3096 {
3097   ip6_main_t *im = &ip6_main;
3098   uword heapsize = 0;
3099   u32 tmp;
3100   u32 nbuckets = 0;
3101
3102   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3103     {
3104       if (unformat (input, "hash-buckets %d", &tmp))
3105         nbuckets = tmp;
3106       else if (unformat (input, "heap-size %U",
3107                          unformat_memory_size, &heapsize))
3108         ;
3109       else
3110         return clib_error_return (0, "unknown input '%U'",
3111                                   format_unformat_error, input);
3112     }
3113
3114   im->lookup_table_nbuckets = nbuckets;
3115   im->lookup_table_size = heapsize;
3116
3117   return 0;
3118 }
3119
3120 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3121
3122 /*
3123  * fd.io coding-style-patch-verification: ON
3124  *
3125  * Local Variables:
3126  * eval: (c-set-style "gnu")
3127  * End:
3128  */