1f7fc3f66faf7276660edf1f7d4568ec8ab90bf7
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_link.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/receive_dpo.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/classify/vnet_classify.h>
54 #include <vnet/pg/pg.h>
55
56 #ifndef CLIB_MARCH_VARIANT
57 #include <vppinfra/bihash_template.c>
58 #endif
59 #include <vnet/ip/ip6_forward.h>
60 #include <vnet/interface_output.h>
61
62 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
63 #define OI_DECAP   0x80000000
64
65 static void
66 ip6_add_interface_prefix_routes (ip6_main_t * im,
67                                  u32 sw_if_index,
68                                  u32 fib_index,
69                                  ip6_address_t * address, u32 address_length)
70 {
71   ip_lookup_main_t *lm = &im->lookup_main;
72   ip_interface_prefix_t *if_prefix;
73
74   ip_interface_prefix_key_t key = {
75     .prefix = {
76       .fp_len = address_length,
77       .fp_proto = FIB_PROTOCOL_IP6,
78       .fp_addr.ip6 = {
79         .as_u64 = {
80           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
81           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
82         },
83       },
84     },
85     .sw_if_index = sw_if_index,
86   };
87
88   /* If prefix already set on interface, just increment ref count & return */
89   if_prefix = ip_get_interface_prefix (lm, &key);
90   if (if_prefix)
91     {
92       if_prefix->ref_count += 1;
93       return;
94     }
95
96   /* New prefix - allocate a pool entry, initialize it, add to the hash */
97   pool_get (lm->if_prefix_pool, if_prefix);
98   if_prefix->ref_count = 1;
99   clib_memcpy (&if_prefix->key, &key, sizeof (key));
100   mhash_set (&lm->prefix_to_if_prefix_index, &key,
101              if_prefix - lm->if_prefix_pool, 0 /* old value */ );
102
103   /* length < 128 - add glean */
104   if (address_length < 128)
105     {
106       /* set the glean route for the prefix */
107       fib_table_entry_update_one_path (fib_index, &key.prefix,
108                                        FIB_SOURCE_INTERFACE,
109                                        (FIB_ENTRY_FLAG_CONNECTED |
110                                         FIB_ENTRY_FLAG_ATTACHED),
111                                        DPO_PROTO_IP6,
112                                        /* No next-hop address */
113                                        NULL, sw_if_index,
114                                        /* invalid FIB index */
115                                        ~0, 1,
116                                        /* no out-label stack */
117                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
118     }
119 }
120
121 static void
122 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
123                           ip6_main_t * im, u32 fib_index,
124                           ip_interface_address_t * a)
125 {
126   ip_lookup_main_t *lm = &im->lookup_main;
127   ip6_address_t *address = ip_interface_address_get_address (lm, a);
128   fib_prefix_t pfx = {
129     .fp_len = a->address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   /* set special routes for the prefix if needed */
135   ip6_add_interface_prefix_routes (im, sw_if_index, fib_index,
136                                    address, a->address_length);
137
138   pfx.fp_len = 128;
139   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
140     {
141       u32 classify_table_index =
142         lm->classify_table_index_by_sw_if_index[sw_if_index];
143       if (classify_table_index != (u32) ~ 0)
144         {
145           dpo_id_t dpo = DPO_INVALID;
146
147           dpo_set (&dpo,
148                    DPO_CLASSIFY,
149                    DPO_PROTO_IP6,
150                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
151
152           fib_table_entry_special_dpo_add (fib_index,
153                                            &pfx,
154                                            FIB_SOURCE_CLASSIFY,
155                                            FIB_ENTRY_FLAG_NONE, &dpo);
156           dpo_reset (&dpo);
157         }
158     }
159
160   fib_table_entry_update_one_path (fib_index, &pfx,
161                                    FIB_SOURCE_INTERFACE,
162                                    (FIB_ENTRY_FLAG_CONNECTED |
163                                     FIB_ENTRY_FLAG_LOCAL),
164                                    DPO_PROTO_IP6,
165                                    &pfx.fp_addr,
166                                    sw_if_index, ~0,
167                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
168 }
169
170 static void
171 ip6_del_interface_prefix_routes (ip6_main_t * im,
172                                  u32 sw_if_index,
173                                  u32 fib_index,
174                                  ip6_address_t * address, u32 address_length)
175 {
176   ip_lookup_main_t *lm = &im->lookup_main;
177   ip_interface_prefix_t *if_prefix;
178
179   ip_interface_prefix_key_t key = {
180     .prefix = {
181       .fp_len = address_length,
182       .fp_proto = FIB_PROTOCOL_IP6,
183       .fp_addr.ip6 = {
184         .as_u64 = {
185           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
186           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
187         },
188       },
189     },
190     .sw_if_index = sw_if_index,
191   };
192
193   if_prefix = ip_get_interface_prefix (lm, &key);
194   if (!if_prefix)
195     {
196       clib_warning ("Prefix not found while deleting %U",
197                     format_ip6_address_and_length, address, address_length);
198       return;
199     }
200
201   /* If not deleting last intf addr in prefix, decrement ref count & return */
202   if_prefix->ref_count -= 1;
203   if (if_prefix->ref_count > 0)
204     return;
205
206   /* length <= 128, delete glean route */
207   if (address_length <= 128)
208     {
209       /* remove glean route for prefix */
210       fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE);
211     }
212
213   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ );
214   pool_put (lm->if_prefix_pool, if_prefix);
215 }
216
217 static void
218 ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im,
219                           u32 fib_index,
220                           ip6_address_t * address, u32 address_length)
221 {
222   fib_prefix_t pfx = {
223     .fp_len = 128,
224     .fp_proto = FIB_PROTOCOL_IP6,
225     .fp_addr.ip6 = *address,
226   };
227
228   /* delete special routes for the prefix if needed */
229   ip6_del_interface_prefix_routes (im, sw_if_index, fib_index,
230                                    address, address_length);
231
232   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
233 }
234
235 #ifndef CLIB_MARCH_VARIANT
236 void
237 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
238 {
239   ip6_main_t *im = &ip6_main;
240   vnet_main_t *vnm = vnet_get_main ();
241   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
242
243   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
244
245   /*
246    * enable/disable only on the 1<->0 transition
247    */
248   if (is_enable)
249     {
250       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
251         return;
252     }
253   else
254     {
255       /* The ref count is 0 when an address is removed from an interface that has
256        * no address - this is not a ciritical error */
257       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
258           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
259         return;
260     }
261
262   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
263                                !is_enable, 0, 0);
264
265   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
266                                sw_if_index, !is_enable, 0, 0);
267
268   if (is_enable)
269     hi->l3_if_count++;
270   else if (hi->l3_if_count)
271     hi->l3_if_count--;
272 }
273
274 /* get first interface address */
275 ip6_address_t *
276 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
277 {
278   ip_lookup_main_t *lm = &im->lookup_main;
279   ip_interface_address_t *ia = 0;
280   ip6_address_t *result = 0;
281
282   foreach_ip_interface_address (lm, ia, sw_if_index,
283                                 1 /* honor unnumbered */,
284   ({
285     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
286     result = a;
287     break;
288   }));
289   return result;
290 }
291
292 clib_error_t *
293 ip6_add_del_interface_address (vlib_main_t * vm,
294                                u32 sw_if_index,
295                                ip6_address_t * address,
296                                u32 address_length, u32 is_del)
297 {
298   vnet_main_t *vnm = vnet_get_main ();
299   ip6_main_t *im = &ip6_main;
300   ip_lookup_main_t *lm = &im->lookup_main;
301   clib_error_t *error = NULL;
302   u32 if_address_index;
303   ip6_address_fib_t ip6_af, *addr_fib = 0;
304   const ip6_address_t *ll_addr;
305
306   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
307   if (error)
308     {
309       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
310       return error;
311     }
312
313   if (ip6_address_is_link_local_unicast (address))
314     {
315       if (address_length != 128)
316         {
317           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
318           return
319             clib_error_create
320             ("prefix length of link-local address must be 128");
321         }
322       if (!is_del)
323         {
324           int rv;
325
326           rv = ip6_link_set_local_address (sw_if_index, address);
327
328           if (rv)
329             {
330               vnm->api_errno = rv;
331               return clib_error_create ("address not assignable");
332             }
333         }
334       else
335         {
336           ll_addr = ip6_get_link_local_address (sw_if_index);
337           if (ip6_address_is_equal (ll_addr, address))
338             {
339               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
340               return clib_error_create ("address not deletable");
341             }
342           else
343             {
344               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
345               return clib_error_create ("address not found");
346             }
347         }
348
349       return (NULL);
350     }
351
352   ip6_addr_fib_init (&ip6_af, address,
353                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
354   vec_add1 (addr_fib, ip6_af);
355
356   if (!is_del)
357     {
358       /* When adding an address check that it does not conflict
359          with an existing address on any interface in this table. */
360       ip_interface_address_t *ia;
361       vnet_sw_interface_t *sif;
362
363       pool_foreach (sif, vnm->interface_main.sw_interfaces)
364        {
365           if (im->fib_index_by_sw_if_index[sw_if_index] ==
366               im->fib_index_by_sw_if_index[sif->sw_if_index])
367             {
368               foreach_ip_interface_address
369                 (&im->lookup_main, ia, sif->sw_if_index,
370                  0 /* honor unnumbered */ ,
371                  ({
372                    ip6_address_t * x =
373                      ip_interface_address_get_address
374                      (&im->lookup_main, ia);
375
376                    if (ip6_destination_matches_route
377                        (im, address, x, ia->address_length) ||
378                        ip6_destination_matches_route (im,
379                                                       x,
380                                                       address,
381                                                       address_length))
382                      {
383                        /* an intf may have >1 addr from the same prefix */
384                        if ((sw_if_index == sif->sw_if_index) &&
385                            (ia->address_length == address_length) &&
386                            !ip6_address_is_equal (x, address))
387                          continue;
388
389                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
390                          /* if the address we're comparing against is stale
391                           * then the CP has not added this one back yet, maybe
392                           * it never will, so we have to assume it won't and
393                           * ignore it. if it does add it back, then it will fail
394                           * because this one is now present */
395                          continue;
396
397                        /* error if the length or intf was different */
398                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
399                        error =  clib_error_create
400                          ("failed to add %U which conflicts with %U for interface %U",
401                           format_ip6_address_and_length, address,
402                           address_length,
403                           format_ip6_address_and_length, x,
404                           ia->address_length,
405                           format_vnet_sw_if_index_name, vnm,
406                           sif->sw_if_index);
407                        goto done;
408                      }
409                  }));
410             }
411       }
412     }
413
414   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
415
416   if (is_del)
417     {
418       if (~0 == if_address_index)
419         {
420           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
421           error = clib_error_create ("%U not found for interface %U",
422                                      lm->format_address_and_length,
423                                      addr_fib, address_length,
424                                      format_vnet_sw_if_index_name, vnm,
425                                      sw_if_index);
426           goto done;
427         }
428
429       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
430                                         address_length, sw_if_index);
431       if (error)
432         goto done;
433     }
434   else
435     {
436       if (~0 != if_address_index)
437         {
438           ip_interface_address_t *ia;
439
440           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
441
442           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
443             {
444               if (ia->sw_if_index == sw_if_index)
445                 {
446                   /* re-adding an address during the replace action.
447                    * consdier this the update. clear the flag and
448                    * we're done */
449                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
450                   goto done;
451                 }
452               else
453                 {
454                   /* The prefix is moving from one interface to another.
455                    * delete the stale and add the new */
456                   ip6_add_del_interface_address (vm,
457                                                  ia->sw_if_index,
458                                                  address, address_length, 1);
459                   ia = NULL;
460                   error = ip_interface_address_add (lm, sw_if_index,
461                                                     addr_fib, address_length,
462                                                     &if_address_index);
463                 }
464             }
465           else
466             {
467               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
468               error = clib_error_create
469                 ("Prefix %U already found on interface %U",
470                  lm->format_address_and_length, addr_fib, address_length,
471                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
472             }
473         }
474       else
475         error = ip_interface_address_add (lm, sw_if_index,
476                                           addr_fib, address_length,
477                                           &if_address_index);
478     }
479
480   if (error)
481     goto done;
482
483   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
484   if (!is_del)
485     ip6_link_enable (sw_if_index, NULL);
486
487   /* intf addr routes are added/deleted on admin up/down */
488   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
489     {
490       if (is_del)
491         ip6_del_interface_routes (sw_if_index,
492                                   im, ip6_af.fib_index, address,
493                                   address_length);
494       else
495         ip6_add_interface_routes (vnm, sw_if_index,
496                                   im, ip6_af.fib_index,
497                                   pool_elt_at_index (lm->if_address_pool,
498                                                      if_address_index));
499     }
500
501   ip6_add_del_interface_address_callback_t *cb;
502   vec_foreach (cb, im->add_del_interface_address_callbacks)
503     cb->function (im, cb->function_opaque, sw_if_index,
504                   address, address_length, if_address_index, is_del);
505
506   if (is_del)
507     ip6_link_disable (sw_if_index);
508
509 done:
510   vec_free (addr_fib);
511   return error;
512 }
513
514 #endif
515
516 static clib_error_t *
517 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
518 {
519   ip6_main_t *im = &ip6_main;
520   ip_interface_address_t *ia;
521   ip6_address_t *a;
522   u32 is_admin_up, fib_index;
523
524   vec_validate_init_empty (im->
525                            lookup_main.if_address_pool_index_by_sw_if_index,
526                            sw_if_index, ~0);
527
528   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
529
530   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
531
532   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
533                                 0 /* honor unnumbered */,
534   ({
535     a = ip_interface_address_get_address (&im->lookup_main, ia);
536     if (is_admin_up)
537       ip6_add_interface_routes (vnm, sw_if_index,
538                                 im, fib_index,
539                                 ia);
540     else
541       ip6_del_interface_routes (sw_if_index, im, fib_index,
542                                 a, ia->address_length);
543   }));
544
545   return 0;
546 }
547
548 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
549
550 /* Built-in ip6 unicast rx feature path definition */
551 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
552 {
553   .arc_name  = "ip6-unicast",
554   .start_nodes = VNET_FEATURES ("ip6-input"),
555   .last_in_arc = "ip6-lookup",
556   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
557 };
558
559 VNET_FEATURE_INIT (ip6_flow_classify, static) =
560 {
561   .arc_name = "ip6-unicast",
562   .node_name = "ip6-flow-classify",
563   .runs_before = VNET_FEATURES ("ip6-inacl"),
564 };
565
566 VNET_FEATURE_INIT (ip6_inacl, static) =
567 {
568   .arc_name = "ip6-unicast",
569   .node_name = "ip6-inacl",
570   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
571 };
572
573 VNET_FEATURE_INIT (ip6_policer_classify, static) =
574 {
575   .arc_name = "ip6-unicast",
576   .node_name = "ip6-policer-classify",
577   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
578 };
579
580 VNET_FEATURE_INIT (ip6_ipsec, static) =
581 {
582   .arc_name = "ip6-unicast",
583   .node_name = "ipsec6-input-feature",
584   .runs_before = VNET_FEATURES ("l2tp-decap"),
585 };
586
587 VNET_FEATURE_INIT (ip6_l2tp, static) =
588 {
589   .arc_name = "ip6-unicast",
590   .node_name = "l2tp-decap",
591   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
592 };
593
594 VNET_FEATURE_INIT (ip6_vpath, static) =
595 {
596   .arc_name = "ip6-unicast",
597   .node_name = "vpath-input-ip6",
598   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
599 };
600
601 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
602 {
603   .arc_name = "ip6-unicast",
604   .node_name = "ip6-vxlan-bypass",
605   .runs_before = VNET_FEATURES ("ip6-lookup"),
606 };
607
608 VNET_FEATURE_INIT (ip6_not_enabled, static) =
609 {
610   .arc_name = "ip6-unicast",
611   .node_name = "ip6-not-enabled",
612   .runs_before = VNET_FEATURES ("ip6-lookup"),
613 };
614
615 VNET_FEATURE_INIT (ip6_lookup, static) =
616 {
617   .arc_name = "ip6-unicast",
618   .node_name = "ip6-lookup",
619   .runs_before = 0,  /*last feature*/
620 };
621
622 /* Built-in ip6 multicast rx feature path definition (none now) */
623 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
624 {
625   .arc_name  = "ip6-multicast",
626   .start_nodes = VNET_FEATURES ("ip6-input"),
627   .last_in_arc = "ip6-mfib-forward-lookup",
628   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
629 };
630
631 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
632   .arc_name = "ip6-multicast",
633   .node_name = "vpath-input-ip6",
634   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
635 };
636
637 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
638   .arc_name = "ip6-multicast",
639   .node_name = "ip6-not-enabled",
640   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
641 };
642
643 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
644   .arc_name = "ip6-multicast",
645   .node_name = "ip6-mfib-forward-lookup",
646   .runs_before = 0, /* last feature */
647 };
648
649 /* Built-in ip4 tx feature path definition */
650 VNET_FEATURE_ARC_INIT (ip6_output, static) =
651 {
652   .arc_name  = "ip6-output",
653   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
654   .last_in_arc = "interface-output",
655   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
656 };
657
658 VNET_FEATURE_INIT (ip6_outacl, static) = {
659   .arc_name = "ip6-output",
660   .node_name = "ip6-outacl",
661   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
662 };
663
664 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
665   .arc_name = "ip6-output",
666   .node_name = "ipsec6-output-feature",
667   .runs_before = VNET_FEATURES ("interface-output"),
668 };
669
670 VNET_FEATURE_INIT (ip6_interface_output, static) = {
671   .arc_name = "ip6-output",
672   .node_name = "interface-output",
673   .runs_before = 0, /* not before any other features */
674 };
675
676 static clib_error_t *
677 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
678 {
679   ip6_main_t *im = &ip6_main;
680
681   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
682   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
683
684   if (is_add)
685     {
686       /* Fill in lookup tables with default table (0). */
687       im->fib_index_by_sw_if_index[sw_if_index] = 0;
688       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
689     }
690   else
691     {
692       /* Ensure that IPv6 is disabled */
693       ip6_main_t *im6 = &ip6_main;
694       ip_lookup_main_t *lm6 = &im6->lookup_main;
695       ip_interface_address_t *ia = 0;
696       ip6_address_t *address;
697       vlib_main_t *vm = vlib_get_main ();
698
699       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
700       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
701       ({
702         address = ip_interface_address_get_address (lm6, ia);
703         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
704       }));
705       ip6_mfib_interface_enable_disable (sw_if_index, 0);
706
707       if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
708         fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
709       if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
710         mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
711
712       /* Erase the lookup tables just in case */
713       im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
714       im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
715     }
716
717   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
718                                is_add, 0, 0);
719
720   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
721                                sw_if_index, is_add, 0, 0);
722
723   return /* no error */ 0;
724 }
725
726 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
727
728 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
729                                 vlib_node_runtime_t * node,
730                                 vlib_frame_t * frame)
731 {
732   return ip6_lookup_inline (vm, node, frame);
733 }
734
735 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
736
737 VLIB_REGISTER_NODE (ip6_lookup_node) =
738 {
739   .name = "ip6-lookup",
740   .vector_size = sizeof (u32),
741   .format_trace = format_ip6_lookup_trace,
742   .n_next_nodes = IP6_LOOKUP_N_NEXT,
743   .next_nodes = IP6_LOOKUP_NEXT_NODES,
744 };
745
746 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
747                                       vlib_node_runtime_t * node,
748                                       vlib_frame_t * frame)
749 {
750   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
751   u32 n_left, *from;
752   u32 thread_index = vm->thread_index;
753   ip6_main_t *im = &ip6_main;
754   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
755   u16 nexts[VLIB_FRAME_SIZE], *next;
756
757   from = vlib_frame_vector_args (frame);
758   n_left = frame->n_vectors;
759   next = nexts;
760
761   vlib_get_buffers (vm, from, bufs, n_left);
762
763   while (n_left >= 4)
764     {
765       const load_balance_t *lb0, *lb1;
766       const ip6_header_t *ip0, *ip1;
767       u32 lbi0, hc0, lbi1, hc1;
768       const dpo_id_t *dpo0, *dpo1;
769
770       /* Prefetch next iteration. */
771       {
772         vlib_prefetch_buffer_header (b[2], STORE);
773         vlib_prefetch_buffer_header (b[3], STORE);
774
775         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
776         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
777       }
778
779       ip0 = vlib_buffer_get_current (b[0]);
780       ip1 = vlib_buffer_get_current (b[1]);
781       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
782       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
783
784       lb0 = load_balance_get (lbi0);
785       lb1 = load_balance_get (lbi1);
786
787       /*
788        * this node is for via FIBs we can re-use the hash value from the
789        * to node if present.
790        * We don't want to use the same hash value at each level in the recursion
791        * graph as that would lead to polarisation
792        */
793       hc0 = hc1 = 0;
794
795       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
796         {
797           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
798             {
799               hc0 = vnet_buffer (b[0])->ip.flow_hash =
800                 vnet_buffer (b[0])->ip.flow_hash >> 1;
801             }
802           else
803             {
804               hc0 = vnet_buffer (b[0])->ip.flow_hash =
805                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
806             }
807           dpo0 = load_balance_get_fwd_bucket
808             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
809         }
810       else
811         {
812           dpo0 = load_balance_get_bucket_i (lb0, 0);
813         }
814       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
815         {
816           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
817             {
818               hc1 = vnet_buffer (b[1])->ip.flow_hash =
819                 vnet_buffer (b[1])->ip.flow_hash >> 1;
820             }
821           else
822             {
823               hc1 = vnet_buffer (b[1])->ip.flow_hash =
824                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
825             }
826           dpo1 = load_balance_get_fwd_bucket
827             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
828         }
829       else
830         {
831           dpo1 = load_balance_get_bucket_i (lb1, 0);
832         }
833
834       next[0] = dpo0->dpoi_next_node;
835       next[1] = dpo1->dpoi_next_node;
836
837       /* Only process the HBH Option Header if explicitly configured to do so */
838       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
839         {
840           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
841             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
842         }
843       /* Only process the HBH Option Header if explicitly configured to do so */
844       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
845         {
846           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
847             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
848         }
849
850       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
851       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
852
853       vlib_increment_combined_counter
854         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
855       vlib_increment_combined_counter
856         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
857
858       b += 2;
859       next += 2;
860       n_left -= 2;
861     }
862
863   while (n_left > 0)
864     {
865       const load_balance_t *lb0;
866       const ip6_header_t *ip0;
867       const dpo_id_t *dpo0;
868       u32 lbi0, hc0;
869
870       ip0 = vlib_buffer_get_current (b[0]);
871       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
872
873       lb0 = load_balance_get (lbi0);
874
875       hc0 = 0;
876       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
877         {
878           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
879             {
880               hc0 = vnet_buffer (b[0])->ip.flow_hash =
881                 vnet_buffer (b[0])->ip.flow_hash >> 1;
882             }
883           else
884             {
885               hc0 = vnet_buffer (b[0])->ip.flow_hash =
886                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
887             }
888           dpo0 = load_balance_get_fwd_bucket
889             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
890         }
891       else
892         {
893           dpo0 = load_balance_get_bucket_i (lb0, 0);
894         }
895
896       next[0] = dpo0->dpoi_next_node;
897       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
898
899       /* Only process the HBH Option Header if explicitly configured to do so */
900       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
901         {
902           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
903             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
904         }
905
906       vlib_increment_combined_counter
907         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
908
909       b += 1;
910       next += 1;
911       n_left -= 1;
912     }
913
914   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
915
916   if (node->flags & VLIB_NODE_FLAG_TRACE)
917     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
918
919   return frame->n_vectors;
920 }
921
922 VLIB_REGISTER_NODE (ip6_load_balance_node) =
923 {
924   .name = "ip6-load-balance",
925   .vector_size = sizeof (u32),
926   .sibling_of = "ip6-lookup",
927   .format_trace = format_ip6_lookup_trace,
928 };
929
930 typedef struct
931 {
932   /* Adjacency taken. */
933   u32 adj_index;
934   u32 flow_hash;
935   u32 fib_index;
936
937   /* Packet data, possibly *after* rewrite. */
938   u8 packet_data[128 - 1 * sizeof (u32)];
939 }
940 ip6_forward_next_trace_t;
941
942 #ifndef CLIB_MARCH_VARIANT
943 u8 *
944 format_ip6_forward_next_trace (u8 * s, va_list * args)
945 {
946   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
947   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
948   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
949   u32 indent = format_get_indent (s);
950
951   s = format (s, "%Ufib:%d adj:%d flow:%d",
952               format_white_space, indent,
953               t->fib_index, t->adj_index, t->flow_hash);
954   s = format (s, "\n%U%U",
955               format_white_space, indent,
956               format_ip6_header, t->packet_data, sizeof (t->packet_data));
957   return s;
958 }
959 #endif
960
961 static u8 *
962 format_ip6_lookup_trace (u8 * s, va_list * args)
963 {
964   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
965   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
966   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
967   u32 indent = format_get_indent (s);
968
969   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
970               t->fib_index, t->adj_index, t->flow_hash);
971   s = format (s, "\n%U%U",
972               format_white_space, indent,
973               format_ip6_header, t->packet_data, sizeof (t->packet_data));
974   return s;
975 }
976
977
978 static u8 *
979 format_ip6_rewrite_trace (u8 * s, va_list * args)
980 {
981   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
982   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
983   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
984   u32 indent = format_get_indent (s);
985
986   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
987               t->fib_index, t->adj_index, format_ip_adjacency,
988               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
989   s = format (s, "\n%U%U",
990               format_white_space, indent,
991               format_ip_adjacency_packet_data,
992               t->packet_data, sizeof (t->packet_data));
993   return s;
994 }
995
996 /* Common trace function for all ip6-forward next nodes. */
997 #ifndef CLIB_MARCH_VARIANT
998 void
999 ip6_forward_next_trace (vlib_main_t * vm,
1000                         vlib_node_runtime_t * node,
1001                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1002 {
1003   u32 *from, n_left;
1004   ip6_main_t *im = &ip6_main;
1005
1006   n_left = frame->n_vectors;
1007   from = vlib_frame_vector_args (frame);
1008
1009   while (n_left >= 4)
1010     {
1011       u32 bi0, bi1;
1012       vlib_buffer_t *b0, *b1;
1013       ip6_forward_next_trace_t *t0, *t1;
1014
1015       /* Prefetch next iteration. */
1016       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1017       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1018
1019       bi0 = from[0];
1020       bi1 = from[1];
1021
1022       b0 = vlib_get_buffer (vm, bi0);
1023       b1 = vlib_get_buffer (vm, bi1);
1024
1025       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1026         {
1027           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1028           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1029           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1030           t0->fib_index =
1031             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1032              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1033             vec_elt (im->fib_index_by_sw_if_index,
1034                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1035
1036           clib_memcpy_fast (t0->packet_data,
1037                             vlib_buffer_get_current (b0),
1038                             sizeof (t0->packet_data));
1039         }
1040       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1041         {
1042           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1043           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1044           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1045           t1->fib_index =
1046             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1047              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1048             vec_elt (im->fib_index_by_sw_if_index,
1049                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1050
1051           clib_memcpy_fast (t1->packet_data,
1052                             vlib_buffer_get_current (b1),
1053                             sizeof (t1->packet_data));
1054         }
1055       from += 2;
1056       n_left -= 2;
1057     }
1058
1059   while (n_left >= 1)
1060     {
1061       u32 bi0;
1062       vlib_buffer_t *b0;
1063       ip6_forward_next_trace_t *t0;
1064
1065       bi0 = from[0];
1066
1067       b0 = vlib_get_buffer (vm, bi0);
1068
1069       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1070         {
1071           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1072           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1073           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1074           t0->fib_index =
1075             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1076              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1077             vec_elt (im->fib_index_by_sw_if_index,
1078                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1079
1080           clib_memcpy_fast (t0->packet_data,
1081                             vlib_buffer_get_current (b0),
1082                             sizeof (t0->packet_data));
1083         }
1084       from += 1;
1085       n_left -= 1;
1086     }
1087 }
1088
1089 /* Compute TCP/UDP/ICMP6 checksum in software. */
1090 u16
1091 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1092                                    ip6_header_t * ip0, int *bogus_lengthp)
1093 {
1094   ip_csum_t sum0 = 0;
1095   u16 payload_length, payload_length_host_byte_order;
1096   u32 i;
1097   u32 headers_size = sizeof (ip0[0]);
1098   u8 *data_this_buffer;
1099   u8 next_hdr = ip0->protocol;
1100
1101   ASSERT (bogus_lengthp);
1102   *bogus_lengthp = 0;
1103
1104   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1105   data_this_buffer = (u8 *) (ip0 + 1);
1106   payload_length = ip0->payload_length;
1107
1108   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
1109    * or UDP-Ping packets */
1110   if (PREDICT_FALSE (next_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1111     {
1112       u32 skip_bytes;
1113       ip6_hop_by_hop_ext_t *ext_hdr =
1114         (ip6_hop_by_hop_ext_t *) data_this_buffer;
1115
1116       /* validate really icmp6 next */
1117       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1118               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
1119
1120       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
1121       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
1122
1123       payload_length_host_byte_order -= skip_bytes;
1124       headers_size += skip_bytes;
1125
1126       /* pseudo-header adjustments:
1127        *   exclude ext header bytes from payload length
1128        *   use payload IP proto rather than ext header IP proto
1129        */
1130       payload_length = clib_host_to_net_u16 (payload_length_host_byte_order);
1131       next_hdr = ext_hdr->next_hdr;
1132     }
1133
1134   /* Initialize checksum with ip pseudo-header. */
1135   sum0 = payload_length + clib_host_to_net_u16 (next_hdr);
1136
1137   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1138     {
1139       sum0 = ip_csum_with_carry
1140         (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1141       sum0 = ip_csum_with_carry
1142         (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1143     }
1144
1145   if (p0)
1146     return ip_calculate_l4_checksum (vm, p0, sum0,
1147                                      payload_length_host_byte_order,
1148                                      (u8 *) ip0, headers_size, NULL);
1149   else
1150     return ip_calculate_l4_checksum (vm, 0, sum0,
1151                                      payload_length_host_byte_order, NULL, 0,
1152                                      data_this_buffer);
1153 }
1154
1155 u32
1156 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1157 {
1158   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1159   udp_header_t *udp0;
1160   u16 sum16;
1161   int bogus_length;
1162
1163   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1164   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1165           || ip0->protocol == IP_PROTOCOL_ICMP6
1166           || ip0->protocol == IP_PROTOCOL_UDP
1167           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1168
1169   udp0 = (void *) (ip0 + 1);
1170   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1171     {
1172       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1173                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1174       return p0->flags;
1175     }
1176
1177   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1178
1179   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1180                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1181
1182   return p0->flags;
1183 }
1184 #endif
1185
1186 /**
1187  * @brief returns number of links on which src is reachable.
1188  */
1189 always_inline int
1190 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1191 {
1192   const load_balance_t *lb0;
1193   index_t lbi;
1194   u32 fib_index;
1195
1196   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1197                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1198   fib_index =
1199     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1200     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1201
1202   lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address);
1203   lb0 = load_balance_get (lbi);
1204
1205   return (fib_urpf_check_size (lb0->lb_urpf));
1206 }
1207
1208 always_inline u8
1209 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1210                            u32 * udp_offset0)
1211 {
1212   int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
1213   if (nh > 0)
1214     if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
1215       return nh;
1216   return 0;
1217 }
1218
1219 VNET_FEATURE_ARC_INIT (ip6_local) = {
1220   .arc_name = "ip6-local",
1221   .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
1222 };
1223
1224 static_always_inline u8
1225 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
1226 {
1227
1228   u16 payload_length_host_byte_order;
1229   u32 n_this_buffer, n_bytes_left;
1230   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1231   u32 headers_size = sizeof (ip0[0]);
1232   u8 *data_this_buffer;
1233
1234
1235   data_this_buffer = (u8 *) (ip0 + 1);
1236
1237   ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *) data_this_buffer;
1238
1239   /* validate really icmp6 next */
1240
1241   if (!(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1242       || (ext_hdr->next_hdr == IP_PROTOCOL_UDP))
1243     return 0;
1244
1245
1246   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1247   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1248
1249
1250   u32 n_ip_bytes_this_buffer =
1251     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1252   if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
1253     {
1254       n_this_buffer = p0->current_length > headers_size ?
1255         n_ip_bytes_this_buffer - headers_size : 0;
1256     }
1257
1258   n_bytes_left -= n_this_buffer;
1259   n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
1260
1261   if (n_bytes_left == 0)
1262     return 0;
1263   else
1264     return 1;
1265 }
1266
1267 always_inline uword
1268 ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1269                   vlib_frame_t *frame, int head_of_feature_arc,
1270                   int is_receive_dpo)
1271 {
1272   ip6_main_t *im = &ip6_main;
1273   ip_lookup_main_t *lm = &im->lookup_main;
1274   u32 *from, n_left_from;
1275   vlib_node_runtime_t *error_node =
1276     vlib_node_get_runtime (vm, ip6_input_node.index);
1277   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1278   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1279   u16 nexts[VLIB_FRAME_SIZE], *next;
1280
1281   from = vlib_frame_vector_args (frame);
1282   n_left_from = frame->n_vectors;
1283
1284   if (node->flags & VLIB_NODE_FLAG_TRACE)
1285     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1286
1287   vlib_get_buffers (vm, from, bufs, n_left_from);
1288   b = bufs;
1289   next = nexts;
1290
1291   while (n_left_from > 2)
1292     {
1293       /* Prefetch next iteration. */
1294       if (n_left_from >= 6)
1295         {
1296           vlib_prefetch_buffer_header (b[4], STORE);
1297           vlib_prefetch_buffer_header (b[5], STORE);
1298           vlib_prefetch_buffer_data (b[2], LOAD);
1299           vlib_prefetch_buffer_data (b[3], LOAD);
1300         }
1301
1302       vl_counter_ip6_enum_t error[2];
1303       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1304       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1305
1306       ip6_header_t *ip[2];
1307       ip[0] = vlib_buffer_get_current (b[0]);
1308       ip[1] = vlib_buffer_get_current (b[1]);
1309
1310       if (head_of_feature_arc)
1311         {
1312           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1313           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1314
1315           u8 type[2];
1316           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1317           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1318
1319           u32 flags[2];
1320           flags[0] = b[0]->flags;
1321           flags[1] = b[1]->flags;
1322
1323           vnet_buffer_oflags_t oflags[2];
1324           oflags[0] = vnet_buffer (b[0])->oflags;
1325           oflags[1] = vnet_buffer (b[1])->oflags;
1326
1327           u32 l4_offload[2];
1328           l4_offload[0] = (flags[0] & VNET_BUFFER_F_OFFLOAD) &&
1329                           (oflags[0] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1330                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1331           l4_offload[1] = (flags[1] & VNET_BUFFER_F_OFFLOAD) &&
1332                           (oflags[1] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1333                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1334
1335           u32 good_l4_csum[2];
1336           good_l4_csum[0] =
1337             (flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[0];
1338           good_l4_csum[1] =
1339             (flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[1];
1340
1341           u32 udp_offset[2] = { };
1342           u8 is_tcp_udp[2];
1343           is_tcp_udp[0] =
1344             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1345           is_tcp_udp[1] =
1346             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1347           i16 len_diff[2] = { 0 };
1348           if (PREDICT_TRUE (is_tcp_udp[0]))
1349             {
1350               udp_header_t *udp =
1351                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1352               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1353                 && udp->checksum == 0;
1354               /* optimistically verify UDP length. */
1355               u16 ip_len, udp_len;
1356               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1357               udp_len = clib_net_to_host_u16 (udp->length);
1358               len_diff[0] = ip_len - udp_len;
1359             }
1360           if (PREDICT_TRUE (is_tcp_udp[1]))
1361             {
1362               udp_header_t *udp =
1363                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1364               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1365                 && udp->checksum == 0;
1366               /* optimistically verify UDP length. */
1367               u16 ip_len, udp_len;
1368               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1369               udp_len = clib_net_to_host_u16 (udp->length);
1370               len_diff[1] = ip_len - udp_len;
1371             }
1372
1373           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1374           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1375
1376           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1377           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1378
1379           u8 need_csum[2];
1380           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1381             && !good_l4_csum[0]
1382             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1383           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1384             && !good_l4_csum[1]
1385             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1386           if (PREDICT_FALSE (need_csum[0]))
1387             {
1388               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1389               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1390               error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1391             }
1392           else
1393             {
1394               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1395                 error[0] = IP6_ERROR_BAD_LENGTH;
1396             }
1397           if (PREDICT_FALSE (need_csum[1]))
1398             {
1399               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1400               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1401               error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1402             }
1403           else
1404             {
1405               if (ip6_tcp_udp_icmp_bad_length (vm, b[1]))
1406                 error[1] = IP6_ERROR_BAD_LENGTH;
1407             }
1408
1409
1410           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1411
1412           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1413
1414           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1415                          IP6_ERROR_UDP_CHECKSUM,
1416                          "Wrong IP6 errors constants");
1417           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1418                          IP6_ERROR_ICMP_CHECKSUM,
1419                          "Wrong IP6 errors constants");
1420
1421           error[0] =
1422             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1423           error[1] =
1424             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1425
1426           /* Drop packets from unroutable hosts. */
1427           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1428           u8 unroutable[2];
1429           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1430             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1431             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1432           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1433             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1434             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1435           if (PREDICT_FALSE (unroutable[0]))
1436             {
1437               error[0] =
1438                 !ip6_urpf_loose_check (im, b[0],
1439                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1440                 : error[0];
1441             }
1442           if (PREDICT_FALSE (unroutable[1]))
1443             {
1444               error[1] =
1445                 !ip6_urpf_loose_check (im, b[1],
1446                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1447                 : error[1];
1448             }
1449
1450           vnet_buffer (b[0])->ip.fib_index =
1451             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1452             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1453             vnet_buffer (b[0])->ip.fib_index;
1454           vnet_buffer (b[1])->ip.fib_index =
1455             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1456             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1457             vnet_buffer (b[1])->ip.fib_index;
1458
1459           vnet_buffer (b[0])->ip.rx_sw_if_index =
1460             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1461           vnet_buffer (b[1])->ip.rx_sw_if_index =
1462             vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1463           if (is_receive_dpo)
1464             {
1465               const receive_dpo_t *rd0, *rd1;
1466               rd0 =
1467                 receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1468               rd1 =
1469                 receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1470               if (rd0->rd_sw_if_index != ~0)
1471                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1472               if (rd1->rd_sw_if_index != ~0)
1473                 vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1474             }
1475         }                       /* head_of_feature_arc */
1476
1477       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1478       next[0] =
1479         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1480       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1481       next[1] =
1482         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1483
1484       b[0]->error = error_node->errors[error[0]];
1485       b[1]->error = error_node->errors[error[1]];
1486
1487       if (head_of_feature_arc)
1488         {
1489           u8 ip6_unknown[2];
1490           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1491           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1492           if (PREDICT_TRUE (ip6_unknown[0]))
1493             {
1494               u32 next32 = next[0];
1495               vnet_feature_arc_start (arc_index,
1496                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1497                                       &next32, b[0]);
1498               next[0] = next32;
1499             }
1500           if (PREDICT_TRUE (ip6_unknown[1]))
1501             {
1502               u32 next32 = next[1];
1503               vnet_feature_arc_start (arc_index,
1504                                       vnet_buffer (b[1])->ip.rx_sw_if_index,
1505                                       &next32, b[1]);
1506               next[1] = next32;
1507             }
1508         }
1509
1510       /* next */
1511       b += 2;
1512       next += 2;
1513       n_left_from -= 2;
1514     }
1515
1516   while (n_left_from)
1517     {
1518       u8 error;
1519       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1520
1521       ip6_header_t *ip;
1522       ip = vlib_buffer_get_current (b[0]);
1523
1524       if (head_of_feature_arc)
1525         {
1526           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1527           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1528
1529           u32 flags = b[0]->flags;
1530
1531           vnet_buffer_oflags_t oflags = vnet_buffer (b[0])->oflags;
1532
1533           u32 l4_offload = (flags & VNET_BUFFER_F_OFFLOAD) &&
1534                            (oflags & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1535                                       VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1536
1537           u32 good_l4_csum =
1538             (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload;
1539           u32 udp_offset;
1540           i16 len_diff = 0;
1541           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1542           if (PREDICT_TRUE (is_tcp_udp))
1543             {
1544               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1545               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1546               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1547                 && udp->checksum == 0;
1548               /* optimistically verify UDP length. */
1549               u16 ip_len, udp_len;
1550               ip_len = clib_net_to_host_u16 (ip->payload_length);
1551               udp_len = clib_net_to_host_u16 (udp->length);
1552               len_diff = ip_len - udp_len;
1553             }
1554
1555           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1556           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1557
1558           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN &&
1559                          !good_l4_csum &&
1560                          !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1561           if (PREDICT_FALSE (need_csum))
1562             {
1563               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1564               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1565               error = IP6_ERROR_UNKNOWN_PROTOCOL;
1566             }
1567           else
1568             {
1569               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1570                 error = IP6_ERROR_BAD_LENGTH;
1571             }
1572
1573
1574
1575           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1576           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1577                          IP6_ERROR_UDP_CHECKSUM,
1578                          "Wrong IP6 errors constants");
1579           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1580                          IP6_ERROR_ICMP_CHECKSUM,
1581                          "Wrong IP6 errors constants");
1582
1583           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1584
1585           /* Drop packets from unroutable hosts. */
1586           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1587           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1588             && type != IP_BUILTIN_PROTOCOL_ICMP
1589             && !ip6_address_is_link_local_unicast (&ip->src_address);
1590           if (PREDICT_FALSE (unroutable))
1591             {
1592               error =
1593                 !ip6_urpf_loose_check (im, b[0],
1594                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1595                 error;
1596             }
1597
1598           vnet_buffer (b[0])->ip.fib_index =
1599             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1600             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1601             vnet_buffer (b[0])->ip.fib_index;
1602
1603           vnet_buffer (b[0])->ip.rx_sw_if_index =
1604             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1605           if (is_receive_dpo)
1606             {
1607               receive_dpo_t *rd;
1608               rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1609               if (rd->rd_sw_if_index != ~0)
1610                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
1611             }
1612         }                       /* head_of_feature_arc */
1613
1614       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1615       next[0] =
1616         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1617
1618       b[0]->error = error_node->errors[error];
1619
1620       if (head_of_feature_arc)
1621         {
1622           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1623             {
1624               u32 next32 = next[0];
1625               vnet_feature_arc_start (arc_index,
1626                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1627                                       &next32, b[0]);
1628               next[0] = next32;
1629             }
1630         }
1631
1632       /* next */
1633       b += 1;
1634       next += 1;
1635       n_left_from -= 1;
1636     }
1637
1638   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1639   return frame->n_vectors;
1640 }
1641
1642 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1643                                vlib_frame_t * frame)
1644 {
1645   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1646                            0 /* ip6_local_inline */);
1647 }
1648
1649 VLIB_REGISTER_NODE (ip6_local_node) =
1650 {
1651   .name = "ip6-local",
1652   .vector_size = sizeof (u32),
1653   .format_trace = format_ip6_forward_next_trace,
1654   .n_errors = IP6_N_ERROR,
1655   .error_counters = ip6_error_counters,
1656   .n_next_nodes = IP_LOCAL_N_NEXT,
1657   .next_nodes =
1658   {
1659     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1660     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1661     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1662     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1663     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
1664   },
1665 };
1666
1667 VLIB_NODE_FN (ip6_receive_local_node)
1668 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1669 {
1670   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1671                            1 /* is_receive_dpo */);
1672 }
1673
1674 VLIB_REGISTER_NODE (ip6_receive_local_node) = {
1675   .name = "ip6-receive",
1676   .vector_size = sizeof (u32),
1677   .format_trace = format_ip6_forward_next_trace,
1678   .sibling_of = "ip6-local"
1679 };
1680
1681 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1682                                           vlib_node_runtime_t * node,
1683                                           vlib_frame_t * frame)
1684 {
1685   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */,
1686                            0 /* ip6_local_inline */);
1687 }
1688
1689 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1690   .name = "ip6-local-end-of-arc",
1691   .vector_size = sizeof (u32),
1692
1693   .format_trace = format_ip6_forward_next_trace,
1694   .sibling_of = "ip6-local",
1695 };
1696
1697 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1698   .arc_name = "ip6-local",
1699   .node_name = "ip6-local-end-of-arc",
1700   .runs_before = 0, /* not before any other features */
1701 };
1702
1703 #ifdef CLIB_MARCH_VARIANT
1704 extern vlib_node_registration_t ip6_local_node;
1705 #else
1706 void
1707 ip6_register_protocol (u32 protocol, u32 node_index)
1708 {
1709   vlib_main_t *vm = vlib_get_main ();
1710   ip6_main_t *im = &ip6_main;
1711   ip_lookup_main_t *lm = &im->lookup_main;
1712
1713   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1714   lm->local_next_by_ip_protocol[protocol] =
1715     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1716 }
1717
1718 void
1719 ip6_unregister_protocol (u32 protocol)
1720 {
1721   ip6_main_t *im = &ip6_main;
1722   ip_lookup_main_t *lm = &im->lookup_main;
1723
1724   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1725   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1726 }
1727 #endif
1728
1729 typedef enum
1730 {
1731   IP6_REWRITE_NEXT_DROP,
1732   IP6_REWRITE_NEXT_ICMP_ERROR,
1733   IP6_REWRITE_NEXT_FRAGMENT,
1734   IP6_REWRITE_N_NEXT            /* Last */
1735 } ip6_rewrite_next_t;
1736
1737 /**
1738  * This bits of an IPv6 address to mask to construct a multicast
1739  * MAC address
1740  */
1741 #define IP6_MCAST_ADDR_MASK 0xffffffff
1742
1743 always_inline void
1744 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1745                u16 adj_packet_bytes, bool is_locally_generated,
1746                u32 * next, u8 is_midchain, u32 * error)
1747 {
1748   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1749     {
1750       if (is_locally_generated)
1751         {
1752           /* IP fragmentation */
1753           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1754                                    (is_midchain ?
1755                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1756                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1757           *next = IP6_REWRITE_NEXT_FRAGMENT;
1758           *error = IP6_ERROR_MTU_EXCEEDED;
1759         }
1760       else
1761         {
1762           *error = IP6_ERROR_MTU_EXCEEDED;
1763           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1764                                        adj_packet_bytes);
1765           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1766         }
1767     }
1768 }
1769
1770 always_inline uword
1771 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1772                              vlib_node_runtime_t * node,
1773                              vlib_frame_t * frame,
1774                              int do_counters, int is_midchain, int is_mcast)
1775 {
1776   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1777   u32 *from = vlib_frame_vector_args (frame);
1778   u32 n_left_from, n_left_to_next, *to_next, next_index;
1779   vlib_node_runtime_t *error_node =
1780     vlib_node_get_runtime (vm, ip6_input_node.index);
1781
1782   n_left_from = frame->n_vectors;
1783   next_index = node->cached_next_index;
1784   u32 thread_index = vm->thread_index;
1785
1786   while (n_left_from > 0)
1787     {
1788       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1789
1790       while (n_left_from >= 4 && n_left_to_next >= 2)
1791         {
1792           const ip_adjacency_t *adj0, *adj1;
1793           vlib_buffer_t *p0, *p1;
1794           ip6_header_t *ip0, *ip1;
1795           u32 pi0, rw_len0, next0, error0, adj_index0;
1796           u32 pi1, rw_len1, next1, error1, adj_index1;
1797           u32 tx_sw_if_index0, tx_sw_if_index1;
1798           bool is_locally_originated0, is_locally_originated1;
1799
1800           /* Prefetch next iteration. */
1801           {
1802             vlib_buffer_t *p2, *p3;
1803
1804             p2 = vlib_get_buffer (vm, from[2]);
1805             p3 = vlib_get_buffer (vm, from[3]);
1806
1807             vlib_prefetch_buffer_header (p2, LOAD);
1808             vlib_prefetch_buffer_header (p3, LOAD);
1809
1810             clib_prefetch_store (p2->pre_data);
1811             clib_prefetch_store (p3->pre_data);
1812
1813             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1814             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1815           }
1816
1817           pi0 = to_next[0] = from[0];
1818           pi1 = to_next[1] = from[1];
1819
1820           from += 2;
1821           n_left_from -= 2;
1822           to_next += 2;
1823           n_left_to_next -= 2;
1824
1825           p0 = vlib_get_buffer (vm, pi0);
1826           p1 = vlib_get_buffer (vm, pi1);
1827
1828           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1829           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1830
1831           ip0 = vlib_buffer_get_current (p0);
1832           ip1 = vlib_buffer_get_current (p1);
1833
1834           error0 = error1 = IP6_ERROR_NONE;
1835           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1836
1837           is_locally_originated0 =
1838             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1839           if (PREDICT_TRUE (!is_locally_originated0))
1840             {
1841               i32 hop_limit0 = ip0->hop_limit;
1842
1843               /* Input node should have reject packets with hop limit 0. */
1844               ASSERT (ip0->hop_limit > 0);
1845
1846               hop_limit0 -= 1;
1847
1848               ip0->hop_limit = hop_limit0;
1849
1850               /*
1851                * If the hop count drops below 1 when forwarding, generate
1852                * an ICMP response.
1853                */
1854               if (PREDICT_FALSE (hop_limit0 <= 0))
1855                 {
1856                   error0 = IP6_ERROR_TIME_EXPIRED;
1857                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1858                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1859                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1860                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1861                                                0);
1862                 }
1863             }
1864
1865           is_locally_originated1 =
1866             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1867           if (PREDICT_TRUE (!is_locally_originated1))
1868             {
1869               i32 hop_limit1 = ip1->hop_limit;
1870
1871               /* Input node should have reject packets with hop limit 0. */
1872               ASSERT (ip1->hop_limit > 0);
1873
1874               hop_limit1 -= 1;
1875
1876               ip1->hop_limit = hop_limit1;
1877
1878               /*
1879                * If the hop count drops below 1 when forwarding, generate
1880                * an ICMP response.
1881                */
1882               if (PREDICT_FALSE (hop_limit1 <= 0))
1883                 {
1884                   error1 = IP6_ERROR_TIME_EXPIRED;
1885                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1886                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1887                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1888                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1889                                                0);
1890                 }
1891             }
1892
1893           adj0 = adj_get (adj_index0);
1894           adj1 = adj_get (adj_index1);
1895
1896           rw_len0 = adj0[0].rewrite_header.data_bytes;
1897           rw_len1 = adj1[0].rewrite_header.data_bytes;
1898           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1899           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1900
1901           if (do_counters)
1902             {
1903               vlib_increment_combined_counter
1904                 (&adjacency_counters,
1905                  thread_index, adj_index0, 1,
1906                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1907               vlib_increment_combined_counter
1908                 (&adjacency_counters,
1909                  thread_index, adj_index1, 1,
1910                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1911             }
1912
1913           /* Check MTU of outgoing interface. */
1914           u16 ip0_len =
1915             clib_net_to_host_u16 (ip0->payload_length) +
1916             sizeof (ip6_header_t);
1917           u16 ip1_len =
1918             clib_net_to_host_u16 (ip1->payload_length) +
1919             sizeof (ip6_header_t);
1920           if (p0->flags & VNET_BUFFER_F_GSO)
1921             ip0_len = gso_mtu_sz (p0);
1922           if (p1->flags & VNET_BUFFER_F_GSO)
1923             ip1_len = gso_mtu_sz (p1);
1924
1925           ip6_mtu_check (p0, ip0_len,
1926                          adj0[0].rewrite_header.max_l3_packet_bytes,
1927                          is_locally_originated0, &next0, is_midchain,
1928                          &error0);
1929           ip6_mtu_check (p1, ip1_len,
1930                          adj1[0].rewrite_header.max_l3_packet_bytes,
1931                          is_locally_originated1, &next1, is_midchain,
1932                          &error1);
1933           /* Don't adjust the buffer for hop count issue; icmp-error node
1934            * wants to see the IP header */
1935           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1936             {
1937               p0->current_data -= rw_len0;
1938               p0->current_length += rw_len0;
1939               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1940               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1941               next0 = adj0[0].rewrite_header.next_index;
1942               if (PREDICT_FALSE
1943                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1944                 vnet_feature_arc_start_w_cfg_index
1945                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
1946                    adj0->ia_cfg_index);
1947             }
1948           else
1949             {
1950               p0->error = error_node->errors[error0];
1951             }
1952           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1953             {
1954               p1->current_data -= rw_len1;
1955               p1->current_length += rw_len1;
1956
1957               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1958               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1959               next1 = adj1[0].rewrite_header.next_index;
1960
1961               if (PREDICT_FALSE
1962                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1963                 vnet_feature_arc_start_w_cfg_index
1964                   (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
1965                    adj1->ia_cfg_index);
1966             }
1967           else
1968             {
1969               p1->error = error_node->errors[error1];
1970             }
1971
1972           if (is_midchain)
1973             {
1974               /* before we paint on the next header, update the L4
1975                * checksums if required, since there's no offload on a tunnel */
1976               vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
1977                                           1 /* is_ip6 */ );
1978               vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
1979                                           1 /* is_ip6 */ );
1980
1981               /* Guess we are only writing on ipv6 header. */
1982               vnet_rewrite_two_headers (adj0[0], adj1[0],
1983                                         ip0, ip1, sizeof (ip6_header_t));
1984             }
1985           else
1986             /* Guess we are only writing on simple Ethernet header. */
1987             vnet_rewrite_two_headers (adj0[0], adj1[0],
1988                                       ip0, ip1, sizeof (ethernet_header_t));
1989
1990           if (is_midchain)
1991             {
1992               if (adj0->sub_type.midchain.fixup_func)
1993                 adj0->sub_type.midchain.fixup_func
1994                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1995               if (adj1->sub_type.midchain.fixup_func)
1996                 adj1->sub_type.midchain.fixup_func
1997                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1998             }
1999           if (is_mcast)
2000             {
2001               /*
2002                * copy bytes from the IP address into the MAC rewrite
2003                */
2004               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2005                                           adj0->
2006                                           rewrite_header.dst_mcast_offset,
2007                                           &ip0->dst_address.as_u32[3],
2008                                           (u8 *) ip0);
2009               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2010                                           adj1->
2011                                           rewrite_header.dst_mcast_offset,
2012                                           &ip1->dst_address.as_u32[3],
2013                                           (u8 *) ip1);
2014             }
2015
2016           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2017                                            to_next, n_left_to_next,
2018                                            pi0, pi1, next0, next1);
2019         }
2020
2021       while (n_left_from > 0 && n_left_to_next > 0)
2022         {
2023           ip_adjacency_t *adj0;
2024           vlib_buffer_t *p0;
2025           ip6_header_t *ip0;
2026           u32 pi0, rw_len0;
2027           u32 adj_index0, next0, error0;
2028           u32 tx_sw_if_index0;
2029           bool is_locally_originated0;
2030
2031           pi0 = to_next[0] = from[0];
2032
2033           p0 = vlib_get_buffer (vm, pi0);
2034
2035           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2036
2037           adj0 = adj_get (adj_index0);
2038
2039           ip0 = vlib_buffer_get_current (p0);
2040
2041           error0 = IP6_ERROR_NONE;
2042           next0 = IP6_REWRITE_NEXT_DROP;
2043
2044           /* Check hop limit */
2045           is_locally_originated0 =
2046             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
2047           if (PREDICT_TRUE (!is_locally_originated0))
2048             {
2049               i32 hop_limit0 = ip0->hop_limit;
2050
2051               ASSERT (ip0->hop_limit > 0);
2052
2053               hop_limit0 -= 1;
2054
2055               ip0->hop_limit = hop_limit0;
2056
2057               if (PREDICT_FALSE (hop_limit0 <= 0))
2058                 {
2059                   /*
2060                    * If the hop count drops below 1 when forwarding, generate
2061                    * an ICMP response.
2062                    */
2063                   error0 = IP6_ERROR_TIME_EXPIRED;
2064                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2065                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2066                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
2067                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
2068                                                0);
2069                 }
2070             }
2071
2072           if (is_midchain)
2073             {
2074               vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
2075                                           1 /* is_ip6 */ );
2076
2077               /* Guess we are only writing on ip6 header. */
2078               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
2079             }
2080           else
2081             /* Guess we are only writing on simple Ethernet header. */
2082             vnet_rewrite_one_header (adj0[0], ip0,
2083                                      sizeof (ethernet_header_t));
2084
2085           /* Update packet buffer attributes/set output interface. */
2086           rw_len0 = adj0[0].rewrite_header.data_bytes;
2087           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2088
2089           if (do_counters)
2090             {
2091               vlib_increment_combined_counter
2092                 (&adjacency_counters,
2093                  thread_index, adj_index0, 1,
2094                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2095             }
2096
2097           /* Check MTU of outgoing interface. */
2098           u16 ip0_len =
2099             clib_net_to_host_u16 (ip0->payload_length) +
2100             sizeof (ip6_header_t);
2101           if (p0->flags & VNET_BUFFER_F_GSO)
2102             ip0_len = gso_mtu_sz (p0);
2103
2104           ip6_mtu_check (p0, ip0_len,
2105                          adj0[0].rewrite_header.max_l3_packet_bytes,
2106                          is_locally_originated0, &next0, is_midchain,
2107                          &error0);
2108           /* Don't adjust the buffer for hop count issue; icmp-error node
2109            * wants to see the IP header */
2110           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
2111             {
2112               p0->current_data -= rw_len0;
2113               p0->current_length += rw_len0;
2114
2115               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2116
2117               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2118               next0 = adj0[0].rewrite_header.next_index;
2119
2120               if (PREDICT_FALSE
2121                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2122                 vnet_feature_arc_start_w_cfg_index
2123                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
2124                    adj0->ia_cfg_index);
2125             }
2126           else
2127             {
2128               p0->error = error_node->errors[error0];
2129             }
2130
2131           if (is_midchain)
2132             {
2133               if (adj0->sub_type.midchain.fixup_func)
2134                 adj0->sub_type.midchain.fixup_func
2135                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2136             }
2137           if (is_mcast)
2138             {
2139               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2140                                           adj0->
2141                                           rewrite_header.dst_mcast_offset,
2142                                           &ip0->dst_address.as_u32[3],
2143                                           (u8 *) ip0);
2144             }
2145
2146           from += 1;
2147           n_left_from -= 1;
2148           to_next += 1;
2149           n_left_to_next -= 1;
2150
2151           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2152                                            to_next, n_left_to_next,
2153                                            pi0, next0);
2154         }
2155
2156       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2157     }
2158
2159   /* Need to do trace after rewrites to pick up new packet data. */
2160   if (node->flags & VLIB_NODE_FLAG_TRACE)
2161     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
2162
2163   return frame->n_vectors;
2164 }
2165
2166 always_inline uword
2167 ip6_rewrite_inline (vlib_main_t * vm,
2168                     vlib_node_runtime_t * node,
2169                     vlib_frame_t * frame,
2170                     int do_counters, int is_midchain, int is_mcast)
2171 {
2172   return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2173                                       is_midchain, is_mcast);
2174 }
2175
2176 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2177                                  vlib_node_runtime_t * node,
2178                                  vlib_frame_t * frame)
2179 {
2180   if (adj_are_counters_enabled ())
2181     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2182   else
2183     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2184 }
2185
2186 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2187                                        vlib_node_runtime_t * node,
2188                                        vlib_frame_t * frame)
2189 {
2190   if (adj_are_counters_enabled ())
2191     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2192   else
2193     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2194 }
2195
2196 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2197                                        vlib_node_runtime_t * node,
2198                                        vlib_frame_t * frame)
2199 {
2200   if (adj_are_counters_enabled ())
2201     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2202   else
2203     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2204 }
2205
2206 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2207                                   vlib_node_runtime_t * node,
2208                                   vlib_frame_t * frame)
2209 {
2210   if (adj_are_counters_enabled ())
2211     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2212   else
2213     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2214 }
2215
2216 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2217                                         vlib_node_runtime_t * node,
2218                                         vlib_frame_t * frame)
2219 {
2220   if (adj_are_counters_enabled ())
2221     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2222   else
2223     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2224 }
2225
2226 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2227   .name = "ip6-midchain",
2228   .vector_size = sizeof (u32),
2229   .format_trace = format_ip6_forward_next_trace,
2230   .sibling_of = "ip6-rewrite",
2231 };
2232
2233 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2234 {
2235   .name = "ip6-rewrite",
2236   .vector_size = sizeof (u32),
2237   .format_trace = format_ip6_rewrite_trace,
2238   .n_next_nodes = IP6_REWRITE_N_NEXT,
2239   .next_nodes =
2240   {
2241     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2242     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2243     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2244   },
2245 };
2246
2247 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2248   .name = "ip6-rewrite-bcast",
2249   .vector_size = sizeof (u32),
2250
2251   .format_trace = format_ip6_rewrite_trace,
2252   .sibling_of = "ip6-rewrite",
2253 };
2254
2255 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2256 {
2257   .name = "ip6-rewrite-mcast",
2258   .vector_size = sizeof (u32),
2259   .format_trace = format_ip6_rewrite_trace,
2260   .sibling_of = "ip6-rewrite",
2261 };
2262
2263
2264 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2265 {
2266   .name = "ip6-mcast-midchain",
2267   .vector_size = sizeof (u32),
2268   .format_trace = format_ip6_rewrite_trace,
2269   .sibling_of = "ip6-rewrite",
2270 };
2271
2272
2273 /*
2274  * Hop-by-Hop handling
2275  */
2276 #ifndef CLIB_MARCH_VARIANT
2277 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2278 #endif /* CLIB_MARCH_VARIANT */
2279
2280 #define foreach_ip6_hop_by_hop_error \
2281 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2282 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2283 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2284
2285 typedef enum
2286 {
2287 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2288   foreach_ip6_hop_by_hop_error
2289 #undef _
2290   IP6_HOP_BY_HOP_N_ERROR,
2291 } ip6_hop_by_hop_error_t;
2292
2293 /*
2294  * Primary h-b-h handler trace support
2295  * We work pretty hard on the problem for obvious reasons
2296  */
2297 typedef struct
2298 {
2299   u32 next_index;
2300   u32 trace_len;
2301   u8 option_data[256];
2302 } ip6_hop_by_hop_trace_t;
2303
2304 extern vlib_node_registration_t ip6_hop_by_hop_node;
2305
2306 static char *ip6_hop_by_hop_error_strings[] = {
2307 #define _(sym,string) string,
2308   foreach_ip6_hop_by_hop_error
2309 #undef _
2310 };
2311
2312 #ifndef CLIB_MARCH_VARIANT
2313 u8 *
2314 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2315 {
2316   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2317   int total_len = va_arg (*args, int);
2318   ip6_hop_by_hop_option_t *opt0, *limit0;
2319   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2320   u8 type0;
2321   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2322               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2323
2324   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2325   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2326
2327   while (opt0 < limit0)
2328     {
2329       type0 = opt0->type;
2330       switch (type0)
2331         {
2332         case 0:         /* Pad, just stop */
2333           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2334           break;
2335
2336         default:
2337           if (hm->trace[type0])
2338             {
2339               s = (*hm->trace[type0]) (s, opt0);
2340             }
2341           else
2342             {
2343               s = format (s, "\n    unrecognized option %d length %d", type0,
2344                           opt0->length);
2345             }
2346           opt0 =
2347             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2348                                          sizeof (ip6_hop_by_hop_option_t));
2349           break;
2350         }
2351     }
2352   return s;
2353 }
2354 #endif
2355
2356 static u8 *
2357 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2358 {
2359   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2360   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2361   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2362   ip6_hop_by_hop_header_t *hbh0;
2363   ip6_hop_by_hop_option_t *opt0, *limit0;
2364   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2365
2366   u8 type0;
2367
2368   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2369
2370   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2371               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2372
2373   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2374   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2375
2376   while (opt0 < limit0)
2377     {
2378       type0 = opt0->type;
2379       switch (type0)
2380         {
2381         case 0:         /* Pad, just stop */
2382           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2383           break;
2384
2385         default:
2386           if (hm->trace[type0])
2387             {
2388               s = (*hm->trace[type0]) (s, opt0);
2389             }
2390           else
2391             {
2392               s = format (s, "\n    unrecognized option %d length %d", type0,
2393                           opt0->length);
2394             }
2395           opt0 =
2396             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2397                                          sizeof (ip6_hop_by_hop_option_t));
2398           break;
2399         }
2400     }
2401   return s;
2402 }
2403
2404 always_inline u8
2405 ip6_scan_hbh_options (vlib_buffer_t * b0,
2406                       ip6_header_t * ip0,
2407                       ip6_hop_by_hop_header_t * hbh0,
2408                       ip6_hop_by_hop_option_t * opt0,
2409                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2410 {
2411   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2412   u8 type0;
2413   u8 error0 = 0;
2414
2415   while (opt0 < limit0)
2416     {
2417       type0 = opt0->type;
2418       switch (type0)
2419         {
2420         case 0:         /* Pad1 */
2421           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2422           continue;
2423         case 1:         /* PadN */
2424           break;
2425         default:
2426           if (hm->options[type0])
2427             {
2428               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2429                 {
2430                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2431                   return (error0);
2432                 }
2433             }
2434           else
2435             {
2436               /* Unrecognized mandatory option, check the two high order bits */
2437               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2438                 {
2439                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2440                   break;
2441                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2442                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2443                   *next0 = IP_LOOKUP_NEXT_DROP;
2444                   break;
2445                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2446                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2447                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2448                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2449                                                ICMP6_parameter_problem_unrecognized_option,
2450                                                (u8 *) opt0 - (u8 *) ip0);
2451                   break;
2452                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2453                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2454                   if (!ip6_address_is_multicast (&ip0->dst_address))
2455                     {
2456                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2457                       icmp6_error_set_vnet_buffer (b0,
2458                                                    ICMP6_parameter_problem,
2459                                                    ICMP6_parameter_problem_unrecognized_option,
2460                                                    (u8 *) opt0 - (u8 *) ip0);
2461                     }
2462                   else
2463                     {
2464                       *next0 = IP_LOOKUP_NEXT_DROP;
2465                     }
2466                   break;
2467                 }
2468               return (error0);
2469             }
2470         }
2471       opt0 =
2472         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2473                                      sizeof (ip6_hop_by_hop_option_t));
2474     }
2475   return (error0);
2476 }
2477
2478 /*
2479  * Process the Hop-by-Hop Options header
2480  */
2481 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2482                                     vlib_node_runtime_t * node,
2483                                     vlib_frame_t * frame)
2484 {
2485   vlib_node_runtime_t *error_node =
2486     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2487   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2488   u32 n_left_from, *from, *to_next;
2489   ip_lookup_next_t next_index;
2490
2491   from = vlib_frame_vector_args (frame);
2492   n_left_from = frame->n_vectors;
2493   next_index = node->cached_next_index;
2494
2495   while (n_left_from > 0)
2496     {
2497       u32 n_left_to_next;
2498
2499       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2500
2501       while (n_left_from >= 4 && n_left_to_next >= 2)
2502         {
2503           u32 bi0, bi1;
2504           vlib_buffer_t *b0, *b1;
2505           u32 next0, next1;
2506           ip6_header_t *ip0, *ip1;
2507           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2508           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2509           u8 error0 = 0, error1 = 0;
2510
2511           /* Prefetch next iteration. */
2512           {
2513             vlib_buffer_t *p2, *p3;
2514
2515             p2 = vlib_get_buffer (vm, from[2]);
2516             p3 = vlib_get_buffer (vm, from[3]);
2517
2518             vlib_prefetch_buffer_header (p2, LOAD);
2519             vlib_prefetch_buffer_header (p3, LOAD);
2520
2521             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2522             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2523           }
2524
2525           /* Speculatively enqueue b0, b1 to the current next frame */
2526           to_next[0] = bi0 = from[0];
2527           to_next[1] = bi1 = from[1];
2528           from += 2;
2529           to_next += 2;
2530           n_left_from -= 2;
2531           n_left_to_next -= 2;
2532
2533           b0 = vlib_get_buffer (vm, bi0);
2534           b1 = vlib_get_buffer (vm, bi1);
2535
2536           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2537           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2538           ip_adjacency_t *adj0 = adj_get (adj_index0);
2539           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2540           ip_adjacency_t *adj1 = adj_get (adj_index1);
2541
2542           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2543           next0 = adj0->lookup_next_index;
2544           next1 = adj1->lookup_next_index;
2545
2546           ip0 = vlib_buffer_get_current (b0);
2547           ip1 = vlib_buffer_get_current (b1);
2548           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2549           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2550           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2551           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2552           limit0 =
2553             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2554                                          ((hbh0->length + 1) << 3));
2555           limit1 =
2556             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2557                                          ((hbh1->length + 1) << 3));
2558
2559           /*
2560            * Basic validity checks
2561            */
2562           if ((hbh0->length + 1) << 3 >
2563               clib_net_to_host_u16 (ip0->payload_length))
2564             {
2565               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2566               next0 = IP_LOOKUP_NEXT_DROP;
2567               goto outdual;
2568             }
2569           /* Scan the set of h-b-h options, process ones that we understand */
2570           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2571
2572           if ((hbh1->length + 1) << 3 >
2573               clib_net_to_host_u16 (ip1->payload_length))
2574             {
2575               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2576               next1 = IP_LOOKUP_NEXT_DROP;
2577               goto outdual;
2578             }
2579           /* Scan the set of h-b-h options, process ones that we understand */
2580           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2581
2582         outdual:
2583           /* Has the classifier flagged this buffer for special treatment? */
2584           if (PREDICT_FALSE
2585               ((error0 == 0)
2586                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2587             next0 = hm->next_override;
2588
2589           /* Has the classifier flagged this buffer for special treatment? */
2590           if (PREDICT_FALSE
2591               ((error1 == 0)
2592                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2593             next1 = hm->next_override;
2594
2595           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2596             {
2597               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2598                 {
2599                   ip6_hop_by_hop_trace_t *t =
2600                     vlib_add_trace (vm, node, b0, sizeof (*t));
2601                   u32 trace_len = (hbh0->length + 1) << 3;
2602                   t->next_index = next0;
2603                   /* Capture the h-b-h option verbatim */
2604                   trace_len =
2605                     trace_len <
2606                     ARRAY_LEN (t->option_data) ? trace_len :
2607                     ARRAY_LEN (t->option_data);
2608                   t->trace_len = trace_len;
2609                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2610                 }
2611               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2612                 {
2613                   ip6_hop_by_hop_trace_t *t =
2614                     vlib_add_trace (vm, node, b1, sizeof (*t));
2615                   u32 trace_len = (hbh1->length + 1) << 3;
2616                   t->next_index = next1;
2617                   /* Capture the h-b-h option verbatim */
2618                   trace_len =
2619                     trace_len <
2620                     ARRAY_LEN (t->option_data) ? trace_len :
2621                     ARRAY_LEN (t->option_data);
2622                   t->trace_len = trace_len;
2623                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2624                 }
2625
2626             }
2627
2628           b0->error = error_node->errors[error0];
2629           b1->error = error_node->errors[error1];
2630
2631           /* verify speculative enqueue, maybe switch current next frame */
2632           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2633                                            n_left_to_next, bi0, bi1, next0,
2634                                            next1);
2635         }
2636
2637       while (n_left_from > 0 && n_left_to_next > 0)
2638         {
2639           u32 bi0;
2640           vlib_buffer_t *b0;
2641           u32 next0;
2642           ip6_header_t *ip0;
2643           ip6_hop_by_hop_header_t *hbh0;
2644           ip6_hop_by_hop_option_t *opt0, *limit0;
2645           u8 error0 = 0;
2646
2647           /* Speculatively enqueue b0 to the current next frame */
2648           bi0 = from[0];
2649           to_next[0] = bi0;
2650           from += 1;
2651           to_next += 1;
2652           n_left_from -= 1;
2653           n_left_to_next -= 1;
2654
2655           b0 = vlib_get_buffer (vm, bi0);
2656           /*
2657            * Default use the next_index from the adjacency.
2658            * A HBH option rarely redirects to a different node
2659            */
2660           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2661           ip_adjacency_t *adj0 = adj_get (adj_index0);
2662           next0 = adj0->lookup_next_index;
2663
2664           ip0 = vlib_buffer_get_current (b0);
2665           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2666           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2667           limit0 =
2668             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2669                                          ((hbh0->length + 1) << 3));
2670
2671           /*
2672            * Basic validity checks
2673            */
2674           if ((hbh0->length + 1) << 3 >
2675               clib_net_to_host_u16 (ip0->payload_length))
2676             {
2677               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2678               next0 = IP_LOOKUP_NEXT_DROP;
2679               goto out0;
2680             }
2681
2682           /* Scan the set of h-b-h options, process ones that we understand */
2683           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2684
2685         out0:
2686           /* Has the classifier flagged this buffer for special treatment? */
2687           if (PREDICT_FALSE
2688               ((error0 == 0)
2689                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2690             next0 = hm->next_override;
2691
2692           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2693             {
2694               ip6_hop_by_hop_trace_t *t =
2695                 vlib_add_trace (vm, node, b0, sizeof (*t));
2696               u32 trace_len = (hbh0->length + 1) << 3;
2697               t->next_index = next0;
2698               /* Capture the h-b-h option verbatim */
2699               trace_len =
2700                 trace_len <
2701                 ARRAY_LEN (t->option_data) ? trace_len :
2702                 ARRAY_LEN (t->option_data);
2703               t->trace_len = trace_len;
2704               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2705             }
2706
2707           b0->error = error_node->errors[error0];
2708
2709           /* verify speculative enqueue, maybe switch current next frame */
2710           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2711                                            n_left_to_next, bi0, next0);
2712         }
2713       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2714     }
2715   return frame->n_vectors;
2716 }
2717
2718 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2719 {
2720   .name = "ip6-hop-by-hop",
2721   .sibling_of = "ip6-lookup",
2722   .vector_size = sizeof (u32),
2723   .format_trace = format_ip6_hop_by_hop_trace,
2724   .type = VLIB_NODE_TYPE_INTERNAL,
2725   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2726   .error_strings = ip6_hop_by_hop_error_strings,
2727   .n_next_nodes = 0,
2728 };
2729
2730 static clib_error_t *
2731 ip6_hop_by_hop_init (vlib_main_t * vm)
2732 {
2733   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2734   clib_memset (hm->options, 0, sizeof (hm->options));
2735   clib_memset (hm->trace, 0, sizeof (hm->trace));
2736   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2737   return (0);
2738 }
2739
2740 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2741
2742 #ifndef CLIB_MARCH_VARIANT
2743 void
2744 ip6_hbh_set_next_override (uword next)
2745 {
2746   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2747
2748   hm->next_override = next;
2749 }
2750
2751 int
2752 ip6_hbh_register_option (u8 option,
2753                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2754                                       ip6_hop_by_hop_option_t * opt),
2755                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2756 {
2757   ip6_main_t *im = &ip6_main;
2758   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2759
2760   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2761
2762   /* Already registered */
2763   if (hm->options[option])
2764     return (-1);
2765
2766   hm->options[option] = options;
2767   hm->trace[option] = trace;
2768
2769   /* Set global variable */
2770   im->hbh_enabled = 1;
2771
2772   return (0);
2773 }
2774
2775 int
2776 ip6_hbh_unregister_option (u8 option)
2777 {
2778   ip6_main_t *im = &ip6_main;
2779   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2780
2781   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2782
2783   /* Not registered */
2784   if (!hm->options[option])
2785     return (-1);
2786
2787   hm->options[option] = NULL;
2788   hm->trace[option] = NULL;
2789
2790   /* Disable global knob if this was the last option configured */
2791   int i;
2792   bool found = false;
2793   for (i = 0; i < 256; i++)
2794     {
2795       if (hm->options[option])
2796         {
2797           found = true;
2798           break;
2799         }
2800     }
2801   if (!found)
2802     im->hbh_enabled = 0;
2803
2804   return (0);
2805 }
2806
2807 /* Global IP6 main. */
2808 ip6_main_t ip6_main;
2809 #endif
2810
2811 static clib_error_t *
2812 ip6_lookup_init (vlib_main_t * vm)
2813 {
2814   ip6_main_t *im = &ip6_main;
2815   clib_error_t *error;
2816   uword i;
2817
2818   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2819     return error;
2820
2821   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2822     {
2823       u32 j, i0, i1;
2824
2825       i0 = i / 32;
2826       i1 = i % 32;
2827
2828       for (j = 0; j < i0; j++)
2829         im->fib_masks[i].as_u32[j] = ~0;
2830
2831       if (i1)
2832         im->fib_masks[i].as_u32[i0] =
2833           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2834     }
2835
2836   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2837
2838   /* Create FIB with index 0 and table id of 0. */
2839   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2840                                      FIB_SOURCE_DEFAULT_ROUTE);
2841   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2842                                       MFIB_SOURCE_DEFAULT_ROUTE);
2843
2844   {
2845     pg_node_t *pn;
2846     pn = pg_get_node (ip6_lookup_node.index);
2847     pn->unformat_edit = unformat_pg_ip6_header;
2848   }
2849
2850   /* Unless explicitly configured, don't process HBH options */
2851   im->hbh_enabled = 0;
2852
2853   return error;
2854 }
2855
2856 VLIB_INIT_FUNCTION (ip6_lookup_init);
2857
2858 static clib_error_t *
2859 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2860                               unformat_input_t * input,
2861                               vlib_cli_command_t * cmd)
2862 {
2863   int matched = 0;
2864   u32 table_id = 0;
2865   u32 flow_hash_config = 0;
2866   int rv;
2867
2868   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2869     {
2870       if (unformat (input, "table %d", &table_id))
2871         matched = 1;
2872 #define _(a, b, v)                                                            \
2873   else if (unformat (input, #a))                                              \
2874   {                                                                           \
2875     flow_hash_config |= v;                                                    \
2876     matched = 1;                                                              \
2877   }
2878       foreach_flow_hash_bit
2879 #undef _
2880         else
2881         break;
2882     }
2883
2884   if (matched == 0)
2885     return clib_error_return (0, "unknown input `%U'",
2886                               format_unformat_error, input);
2887
2888   rv = ip_flow_hash_set (AF_IP6, table_id, flow_hash_config);
2889   switch (rv)
2890     {
2891     case 0:
2892       break;
2893
2894     case -1:
2895       return clib_error_return (0, "no such FIB table %d", table_id);
2896
2897     default:
2898       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2899       break;
2900     }
2901
2902   return 0;
2903 }
2904
2905 /*?
2906  * Configure the set of IPv6 fields used by the flow hash.
2907  *
2908  * @cliexpar
2909  * @parblock
2910  * Example of how to set the flow hash on a given table:
2911  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2912  *
2913  * Example of display the configured flow hash:
2914  * @cliexstart{show ip6 fib}
2915  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2916  * @::/0
2917  *   unicast-ip6-chain
2918  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2919  *     [0] [@0]: dpo-drop ip6
2920  * fe80::/10
2921  *   unicast-ip6-chain
2922  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2923  *     [0] [@2]: dpo-receive
2924  * ff02::1/128
2925  *   unicast-ip6-chain
2926  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2927  *     [0] [@2]: dpo-receive
2928  * ff02::2/128
2929  *   unicast-ip6-chain
2930  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2931  *     [0] [@2]: dpo-receive
2932  * ff02::16/128
2933  *   unicast-ip6-chain
2934  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2935  *     [0] [@2]: dpo-receive
2936  * ff02::1:ff00:0/104
2937  *   unicast-ip6-chain
2938  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2939  *     [0] [@2]: dpo-receive
2940  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2941  * @::/0
2942  *   unicast-ip6-chain
2943  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2944  *     [0] [@0]: dpo-drop ip6
2945  * @::a:1:1:0:4/126
2946  *   unicast-ip6-chain
2947  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2948  *     [0] [@4]: ipv6-glean: af_packet0
2949  * @::a:1:1:0:7/128
2950  *   unicast-ip6-chain
2951  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2952  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2953  * fe80::/10
2954  *   unicast-ip6-chain
2955  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2956  *     [0] [@2]: dpo-receive
2957  * fe80::fe:3eff:fe3e:9222/128
2958  *   unicast-ip6-chain
2959  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2960  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2961  * ff02::1/128
2962  *   unicast-ip6-chain
2963  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2964  *     [0] [@2]: dpo-receive
2965  * ff02::2/128
2966  *   unicast-ip6-chain
2967  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2968  *     [0] [@2]: dpo-receive
2969  * ff02::16/128
2970  *   unicast-ip6-chain
2971  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2972  *     [0] [@2]: dpo-receive
2973  * ff02::1:ff00:0/104
2974  *   unicast-ip6-chain
2975  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2976  *     [0] [@2]: dpo-receive
2977  * @cliexend
2978  * @endparblock
2979 ?*/
2980 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2981   .path = "set ip6 flow-hash",
2982   .short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
2983                 "[dport] [proto] [reverse] [flowlabel]",
2984   .function = set_ip6_flow_hash_command_fn,
2985 };
2986
2987 static clib_error_t *
2988 show_ip6_local_command_fn (vlib_main_t * vm,
2989                            unformat_input_t * input, vlib_cli_command_t * cmd)
2990 {
2991   ip6_main_t *im = &ip6_main;
2992   ip_lookup_main_t *lm = &im->lookup_main;
2993   int i;
2994
2995   vlib_cli_output (vm, "Protocols handled by ip6_local");
2996   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2997     {
2998       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2999         {
3000
3001           u32 node_index = vlib_get_node (vm,
3002                                           ip6_local_node.index)->
3003             next_nodes[lm->local_next_by_ip_protocol[i]];
3004           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
3005                            node_index);
3006         }
3007     }
3008   return 0;
3009 }
3010
3011
3012
3013 /*?
3014  * Display the set of protocols handled by the local IPv6 stack.
3015  *
3016  * @cliexpar
3017  * Example of how to display local protocol table:
3018  * @cliexstart{show ip6 local}
3019  * Protocols handled by ip6_local
3020  * 17
3021  * 43
3022  * 58
3023  * 115
3024  * @cliexend
3025 ?*/
3026 VLIB_CLI_COMMAND (show_ip6_local, static) =
3027 {
3028   .path = "show ip6 local",
3029   .function = show_ip6_local_command_fn,
3030   .short_help = "show ip6 local",
3031 };
3032
3033 #ifndef CLIB_MARCH_VARIANT
3034 int
3035 vnet_set_ip6_classify_intfc (vlib_main_t *vm, u32 sw_if_index, u32 table_index)
3036 {
3037   vnet_main_t *vnm = vnet_get_main ();
3038   vnet_interface_main_t *im = &vnm->interface_main;
3039   ip6_main_t *ipm = &ip6_main;
3040   ip_lookup_main_t *lm = &ipm->lookup_main;
3041   vnet_classify_main_t *cm = &vnet_classify_main;
3042   ip6_address_t *if_addr;
3043
3044   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3045     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3046
3047   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3048     return VNET_API_ERROR_NO_SUCH_ENTRY;
3049
3050   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3051   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3052
3053   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3054
3055   if (NULL != if_addr)
3056     {
3057       fib_prefix_t pfx = {
3058         .fp_len = 128,
3059         .fp_proto = FIB_PROTOCOL_IP6,
3060         .fp_addr.ip6 = *if_addr,
3061       };
3062       u32 fib_index;
3063
3064       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3065                                                        sw_if_index);
3066       if (table_index != (u32) ~ 0)
3067         {
3068           dpo_id_t dpo = DPO_INVALID;
3069           dpo_set (&dpo,
3070                    DPO_CLASSIFY,
3071                    DPO_PROTO_IP6,
3072                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3073           fib_table_entry_special_dpo_add (fib_index,
3074                                            &pfx,
3075                                            FIB_SOURCE_CLASSIFY,
3076                                            FIB_ENTRY_FLAG_NONE, &dpo);
3077           dpo_reset (&dpo);
3078         }
3079       else
3080         {
3081           fib_table_entry_special_remove (fib_index,
3082                                           &pfx, FIB_SOURCE_CLASSIFY);
3083         }
3084     }
3085
3086   return 0;
3087 }
3088 #endif
3089
3090 static clib_error_t *
3091 set_ip6_classify_command_fn (vlib_main_t * vm,
3092                              unformat_input_t * input,
3093                              vlib_cli_command_t * cmd)
3094 {
3095   u32 table_index = ~0;
3096   int table_index_set = 0;
3097   u32 sw_if_index = ~0;
3098   int rv;
3099
3100   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3101     {
3102       if (unformat (input, "table-index %d", &table_index))
3103         table_index_set = 1;
3104       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3105                          vnet_get_main (), &sw_if_index))
3106         ;
3107       else
3108         break;
3109     }
3110
3111   if (table_index_set == 0)
3112     return clib_error_return (0, "classify table-index must be specified");
3113
3114   if (sw_if_index == ~0)
3115     return clib_error_return (0, "interface / subif must be specified");
3116
3117   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3118
3119   switch (rv)
3120     {
3121     case 0:
3122       break;
3123
3124     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3125       return clib_error_return (0, "No such interface");
3126
3127     case VNET_API_ERROR_NO_SUCH_ENTRY:
3128       return clib_error_return (0, "No such classifier table");
3129     }
3130   return 0;
3131 }
3132
3133 /*?
3134  * Assign a classification table to an interface. The classification
3135  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3136  * commands. Once the table is create, use this command to filter packets
3137  * on an interface.
3138  *
3139  * @cliexpar
3140  * Example of how to assign a classification table to an interface:
3141  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3142 ?*/
3143 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3144 {
3145   .path = "set ip6 classify",
3146   .short_help =
3147   "set ip6 classify intfc <interface> table-index <classify-idx>",
3148   .function = set_ip6_classify_command_fn,
3149 };
3150
3151 /*
3152  * fd.io coding-style-patch-verification: ON
3153  *
3154  * Local Variables:
3155  * eval: (c-set-style "gnu")
3156  * End:
3157  */