ip: add support for buffer offload metadata in ip midchain
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_link.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/receive_dpo.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/classify/vnet_classify.h>
54 #include <vnet/pg/pg.h>
55
56 #ifndef CLIB_MARCH_VARIANT
57 #include <vppinfra/bihash_template.c>
58 #endif
59 #include <vnet/ip/ip6_forward.h>
60 #include <vnet/interface_output.h>
61
62 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
63 #define OI_DECAP   0x80000000
64
65 static void
66 ip6_add_interface_prefix_routes (ip6_main_t * im,
67                                  u32 sw_if_index,
68                                  u32 fib_index,
69                                  ip6_address_t * address, u32 address_length)
70 {
71   ip_lookup_main_t *lm = &im->lookup_main;
72   ip_interface_prefix_t *if_prefix;
73
74   ip_interface_prefix_key_t key = {
75     .prefix = {
76       .fp_len = address_length,
77       .fp_proto = FIB_PROTOCOL_IP6,
78       .fp_addr.ip6 = {
79         .as_u64 = {
80           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
81           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
82         },
83       },
84     },
85     .sw_if_index = sw_if_index,
86   };
87
88   /* If prefix already set on interface, just increment ref count & return */
89   if_prefix = ip_get_interface_prefix (lm, &key);
90   if (if_prefix)
91     {
92       if_prefix->ref_count += 1;
93       return;
94     }
95
96   /* New prefix - allocate a pool entry, initialize it, add to the hash */
97   pool_get (lm->if_prefix_pool, if_prefix);
98   if_prefix->ref_count = 1;
99   clib_memcpy (&if_prefix->key, &key, sizeof (key));
100   mhash_set (&lm->prefix_to_if_prefix_index, &key,
101              if_prefix - lm->if_prefix_pool, 0 /* old value */ );
102
103   /* length < 128 - add glean */
104   if (address_length < 128)
105     {
106       /* set the glean route for the prefix */
107       fib_table_entry_update_one_path (fib_index, &key.prefix,
108                                        FIB_SOURCE_INTERFACE,
109                                        (FIB_ENTRY_FLAG_CONNECTED |
110                                         FIB_ENTRY_FLAG_ATTACHED),
111                                        DPO_PROTO_IP6,
112                                        /* No next-hop address */
113                                        NULL, sw_if_index,
114                                        /* invalid FIB index */
115                                        ~0, 1,
116                                        /* no out-label stack */
117                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
118     }
119 }
120
121 static void
122 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
123                           ip6_main_t * im, u32 fib_index,
124                           ip_interface_address_t * a)
125 {
126   ip_lookup_main_t *lm = &im->lookup_main;
127   ip6_address_t *address = ip_interface_address_get_address (lm, a);
128   fib_prefix_t pfx = {
129     .fp_len = a->address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   /* set special routes for the prefix if needed */
135   ip6_add_interface_prefix_routes (im, sw_if_index, fib_index,
136                                    address, a->address_length);
137
138   pfx.fp_len = 128;
139   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
140     {
141       u32 classify_table_index =
142         lm->classify_table_index_by_sw_if_index[sw_if_index];
143       if (classify_table_index != (u32) ~ 0)
144         {
145           dpo_id_t dpo = DPO_INVALID;
146
147           dpo_set (&dpo,
148                    DPO_CLASSIFY,
149                    DPO_PROTO_IP6,
150                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
151
152           fib_table_entry_special_dpo_add (fib_index,
153                                            &pfx,
154                                            FIB_SOURCE_CLASSIFY,
155                                            FIB_ENTRY_FLAG_NONE, &dpo);
156           dpo_reset (&dpo);
157         }
158     }
159
160   fib_table_entry_update_one_path (fib_index, &pfx,
161                                    FIB_SOURCE_INTERFACE,
162                                    (FIB_ENTRY_FLAG_CONNECTED |
163                                     FIB_ENTRY_FLAG_LOCAL),
164                                    DPO_PROTO_IP6,
165                                    &pfx.fp_addr,
166                                    sw_if_index, ~0,
167                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
168 }
169
170 static void
171 ip6_del_interface_prefix_routes (ip6_main_t * im,
172                                  u32 sw_if_index,
173                                  u32 fib_index,
174                                  ip6_address_t * address, u32 address_length)
175 {
176   ip_lookup_main_t *lm = &im->lookup_main;
177   ip_interface_prefix_t *if_prefix;
178
179   ip_interface_prefix_key_t key = {
180     .prefix = {
181       .fp_len = address_length,
182       .fp_proto = FIB_PROTOCOL_IP6,
183       .fp_addr.ip6 = {
184         .as_u64 = {
185           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
186           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
187         },
188       },
189     },
190     .sw_if_index = sw_if_index,
191   };
192
193   if_prefix = ip_get_interface_prefix (lm, &key);
194   if (!if_prefix)
195     {
196       clib_warning ("Prefix not found while deleting %U",
197                     format_ip6_address_and_length, address, address_length);
198       return;
199     }
200
201   /* If not deleting last intf addr in prefix, decrement ref count & return */
202   if_prefix->ref_count -= 1;
203   if (if_prefix->ref_count > 0)
204     return;
205
206   /* length <= 128, delete glean route */
207   if (address_length <= 128)
208     {
209       /* remove glean route for prefix */
210       fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE);
211     }
212
213   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ );
214   pool_put (lm->if_prefix_pool, if_prefix);
215 }
216
217 static void
218 ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im,
219                           u32 fib_index,
220                           ip6_address_t * address, u32 address_length)
221 {
222   fib_prefix_t pfx = {
223     .fp_len = 128,
224     .fp_proto = FIB_PROTOCOL_IP6,
225     .fp_addr.ip6 = *address,
226   };
227
228   /* delete special routes for the prefix if needed */
229   ip6_del_interface_prefix_routes (im, sw_if_index, fib_index,
230                                    address, address_length);
231
232   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
233 }
234
235 #ifndef CLIB_MARCH_VARIANT
236 void
237 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
238 {
239   ip6_main_t *im = &ip6_main;
240   vnet_main_t *vnm = vnet_get_main ();
241   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
242
243   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
244
245   /*
246    * enable/disable only on the 1<->0 transition
247    */
248   if (is_enable)
249     {
250       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
251         return;
252     }
253   else
254     {
255       /* The ref count is 0 when an address is removed from an interface that has
256        * no address - this is not a ciritical error */
257       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
258           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
259         return;
260     }
261
262   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
263                                !is_enable, 0, 0);
264
265   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
266                                sw_if_index, !is_enable, 0, 0);
267
268   if (is_enable)
269     hi->l3_if_count++;
270   else if (hi->l3_if_count)
271     hi->l3_if_count--;
272 }
273
274 /* get first interface address */
275 ip6_address_t *
276 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
277 {
278   ip_lookup_main_t *lm = &im->lookup_main;
279   ip_interface_address_t *ia = 0;
280   ip6_address_t *result = 0;
281
282   foreach_ip_interface_address (lm, ia, sw_if_index,
283                                 1 /* honor unnumbered */,
284   ({
285     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
286     result = a;
287     break;
288   }));
289   return result;
290 }
291
292 clib_error_t *
293 ip6_add_del_interface_address (vlib_main_t * vm,
294                                u32 sw_if_index,
295                                ip6_address_t * address,
296                                u32 address_length, u32 is_del)
297 {
298   vnet_main_t *vnm = vnet_get_main ();
299   ip6_main_t *im = &ip6_main;
300   ip_lookup_main_t *lm = &im->lookup_main;
301   clib_error_t *error = NULL;
302   u32 if_address_index;
303   ip6_address_fib_t ip6_af, *addr_fib = 0;
304   const ip6_address_t *ll_addr;
305
306   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
307   if (error)
308     {
309       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
310       return error;
311     }
312
313   if (ip6_address_is_link_local_unicast (address))
314     {
315       if (address_length != 128)
316         {
317           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
318           return
319             clib_error_create
320             ("prefix length of link-local address must be 128");
321         }
322       if (!is_del)
323         {
324           int rv;
325
326           rv = ip6_link_set_local_address (sw_if_index, address);
327
328           if (rv)
329             {
330               vnm->api_errno = rv;
331               return clib_error_create ("address not assignable");
332             }
333         }
334       else
335         {
336           ll_addr = ip6_get_link_local_address (sw_if_index);
337           if (ip6_address_is_equal (ll_addr, address))
338             {
339               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
340               return clib_error_create ("address not deletable");
341             }
342           else
343             {
344               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
345               return clib_error_create ("address not found");
346             }
347         }
348
349       return (NULL);
350     }
351
352   ip6_addr_fib_init (&ip6_af, address,
353                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
354   vec_add1 (addr_fib, ip6_af);
355
356   if (!is_del)
357     {
358       /* When adding an address check that it does not conflict
359          with an existing address on any interface in this table. */
360       ip_interface_address_t *ia;
361       vnet_sw_interface_t *sif;
362
363       pool_foreach (sif, vnm->interface_main.sw_interfaces)
364        {
365           if (im->fib_index_by_sw_if_index[sw_if_index] ==
366               im->fib_index_by_sw_if_index[sif->sw_if_index])
367             {
368               foreach_ip_interface_address
369                 (&im->lookup_main, ia, sif->sw_if_index,
370                  0 /* honor unnumbered */ ,
371                  ({
372                    ip6_address_t * x =
373                      ip_interface_address_get_address
374                      (&im->lookup_main, ia);
375
376                    if (ip6_destination_matches_route
377                        (im, address, x, ia->address_length) ||
378                        ip6_destination_matches_route (im,
379                                                       x,
380                                                       address,
381                                                       address_length))
382                      {
383                        /* an intf may have >1 addr from the same prefix */
384                        if ((sw_if_index == sif->sw_if_index) &&
385                            (ia->address_length == address_length) &&
386                            !ip6_address_is_equal (x, address))
387                          continue;
388
389                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
390                          /* if the address we're comparing against is stale
391                           * then the CP has not added this one back yet, maybe
392                           * it never will, so we have to assume it won't and
393                           * ignore it. if it does add it back, then it will fail
394                           * because this one is now present */
395                          continue;
396
397                        /* error if the length or intf was different */
398                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
399                        error =  clib_error_create
400                          ("failed to add %U which conflicts with %U for interface %U",
401                           format_ip6_address_and_length, address,
402                           address_length,
403                           format_ip6_address_and_length, x,
404                           ia->address_length,
405                           format_vnet_sw_if_index_name, vnm,
406                           sif->sw_if_index);
407                        goto done;
408                      }
409                  }));
410             }
411       }
412     }
413
414   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
415
416   if (is_del)
417     {
418       if (~0 == if_address_index)
419         {
420           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
421           error = clib_error_create ("%U not found for interface %U",
422                                      lm->format_address_and_length,
423                                      addr_fib, address_length,
424                                      format_vnet_sw_if_index_name, vnm,
425                                      sw_if_index);
426           goto done;
427         }
428
429       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
430                                         address_length, sw_if_index);
431       if (error)
432         goto done;
433     }
434   else
435     {
436       if (~0 != if_address_index)
437         {
438           ip_interface_address_t *ia;
439
440           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
441
442           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
443             {
444               if (ia->sw_if_index == sw_if_index)
445                 {
446                   /* re-adding an address during the replace action.
447                    * consdier this the update. clear the flag and
448                    * we're done */
449                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
450                   goto done;
451                 }
452               else
453                 {
454                   /* The prefix is moving from one interface to another.
455                    * delete the stale and add the new */
456                   ip6_add_del_interface_address (vm,
457                                                  ia->sw_if_index,
458                                                  address, address_length, 1);
459                   ia = NULL;
460                   error = ip_interface_address_add (lm, sw_if_index,
461                                                     addr_fib, address_length,
462                                                     &if_address_index);
463                 }
464             }
465           else
466             {
467               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
468               error = clib_error_create
469                 ("Prefix %U already found on interface %U",
470                  lm->format_address_and_length, addr_fib, address_length,
471                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
472             }
473         }
474       else
475         error = ip_interface_address_add (lm, sw_if_index,
476                                           addr_fib, address_length,
477                                           &if_address_index);
478     }
479
480   if (error)
481     goto done;
482
483   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
484   if (!is_del)
485     ip6_link_enable (sw_if_index, NULL);
486
487   /* intf addr routes are added/deleted on admin up/down */
488   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
489     {
490       if (is_del)
491         ip6_del_interface_routes (sw_if_index,
492                                   im, ip6_af.fib_index, address,
493                                   address_length);
494       else
495         ip6_add_interface_routes (vnm, sw_if_index,
496                                   im, ip6_af.fib_index,
497                                   pool_elt_at_index (lm->if_address_pool,
498                                                      if_address_index));
499     }
500
501   ip6_add_del_interface_address_callback_t *cb;
502   vec_foreach (cb, im->add_del_interface_address_callbacks)
503     cb->function (im, cb->function_opaque, sw_if_index,
504                   address, address_length, if_address_index, is_del);
505
506   if (is_del)
507     ip6_link_disable (sw_if_index);
508
509 done:
510   vec_free (addr_fib);
511   return error;
512 }
513
514 #endif
515
516 static clib_error_t *
517 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
518 {
519   ip6_main_t *im = &ip6_main;
520   ip_interface_address_t *ia;
521   ip6_address_t *a;
522   u32 is_admin_up, fib_index;
523
524   vec_validate_init_empty (im->
525                            lookup_main.if_address_pool_index_by_sw_if_index,
526                            sw_if_index, ~0);
527
528   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
529
530   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
531
532   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
533                                 0 /* honor unnumbered */,
534   ({
535     a = ip_interface_address_get_address (&im->lookup_main, ia);
536     if (is_admin_up)
537       ip6_add_interface_routes (vnm, sw_if_index,
538                                 im, fib_index,
539                                 ia);
540     else
541       ip6_del_interface_routes (sw_if_index, im, fib_index,
542                                 a, ia->address_length);
543   }));
544
545   return 0;
546 }
547
548 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
549
550 /* Built-in ip6 unicast rx feature path definition */
551 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
552 {
553   .arc_name  = "ip6-unicast",
554   .start_nodes = VNET_FEATURES ("ip6-input"),
555   .last_in_arc = "ip6-lookup",
556   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
557 };
558
559 VNET_FEATURE_INIT (ip6_flow_classify, static) =
560 {
561   .arc_name = "ip6-unicast",
562   .node_name = "ip6-flow-classify",
563   .runs_before = VNET_FEATURES ("ip6-inacl"),
564 };
565
566 VNET_FEATURE_INIT (ip6_inacl, static) =
567 {
568   .arc_name = "ip6-unicast",
569   .node_name = "ip6-inacl",
570   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
571 };
572
573 VNET_FEATURE_INIT (ip6_policer_classify, static) =
574 {
575   .arc_name = "ip6-unicast",
576   .node_name = "ip6-policer-classify",
577   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
578 };
579
580 VNET_FEATURE_INIT (ip6_ipsec, static) =
581 {
582   .arc_name = "ip6-unicast",
583   .node_name = "ipsec6-input-feature",
584   .runs_before = VNET_FEATURES ("l2tp-decap"),
585 };
586
587 VNET_FEATURE_INIT (ip6_l2tp, static) =
588 {
589   .arc_name = "ip6-unicast",
590   .node_name = "l2tp-decap",
591   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
592 };
593
594 VNET_FEATURE_INIT (ip6_vpath, static) =
595 {
596   .arc_name = "ip6-unicast",
597   .node_name = "vpath-input-ip6",
598   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
599 };
600
601 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
602 {
603   .arc_name = "ip6-unicast",
604   .node_name = "ip6-vxlan-bypass",
605   .runs_before = VNET_FEATURES ("ip6-lookup"),
606 };
607
608 VNET_FEATURE_INIT (ip6_not_enabled, static) =
609 {
610   .arc_name = "ip6-unicast",
611   .node_name = "ip6-not-enabled",
612   .runs_before = VNET_FEATURES ("ip6-lookup"),
613 };
614
615 VNET_FEATURE_INIT (ip6_lookup, static) =
616 {
617   .arc_name = "ip6-unicast",
618   .node_name = "ip6-lookup",
619   .runs_before = 0,  /*last feature*/
620 };
621
622 /* Built-in ip6 multicast rx feature path definition (none now) */
623 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
624 {
625   .arc_name  = "ip6-multicast",
626   .start_nodes = VNET_FEATURES ("ip6-input"),
627   .last_in_arc = "ip6-mfib-forward-lookup",
628   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
629 };
630
631 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
632   .arc_name = "ip6-multicast",
633   .node_name = "vpath-input-ip6",
634   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
635 };
636
637 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
638   .arc_name = "ip6-multicast",
639   .node_name = "ip6-not-enabled",
640   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
641 };
642
643 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
644   .arc_name = "ip6-multicast",
645   .node_name = "ip6-mfib-forward-lookup",
646   .runs_before = 0, /* last feature */
647 };
648
649 /* Built-in ip4 tx feature path definition */
650 VNET_FEATURE_ARC_INIT (ip6_output, static) =
651 {
652   .arc_name  = "ip6-output",
653   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
654   .last_in_arc = "interface-output",
655   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
656 };
657
658 VNET_FEATURE_INIT (ip6_outacl, static) = {
659   .arc_name = "ip6-output",
660   .node_name = "ip6-outacl",
661   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
662 };
663
664 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
665   .arc_name = "ip6-output",
666   .node_name = "ipsec6-output-feature",
667   .runs_before = VNET_FEATURES ("interface-output"),
668 };
669
670 VNET_FEATURE_INIT (ip6_interface_output, static) = {
671   .arc_name = "ip6-output",
672   .node_name = "interface-output",
673   .runs_before = 0, /* not before any other features */
674 };
675
676 static clib_error_t *
677 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
678 {
679   ip6_main_t *im = &ip6_main;
680
681   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
682   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
683
684   if (is_add)
685     {
686       /* Fill in lookup tables with default table (0). */
687       im->fib_index_by_sw_if_index[sw_if_index] = 0;
688       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
689     }
690   else
691     {
692       /* Ensure that IPv6 is disabled */
693       ip6_main_t *im6 = &ip6_main;
694       ip_lookup_main_t *lm6 = &im6->lookup_main;
695       ip_interface_address_t *ia = 0;
696       ip6_address_t *address;
697       vlib_main_t *vm = vlib_get_main ();
698
699       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
700       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
701       ({
702         address = ip_interface_address_get_address (lm6, ia);
703         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
704       }));
705       ip6_mfib_interface_enable_disable (sw_if_index, 0);
706
707       if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
708         fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
709       if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
710         mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
711
712       /* Erase the lookup tables just in case */
713       im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
714       im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
715     }
716
717   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
718                                is_add, 0, 0);
719
720   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
721                                sw_if_index, is_add, 0, 0);
722
723   return /* no error */ 0;
724 }
725
726 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
727
728 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
729                                 vlib_node_runtime_t * node,
730                                 vlib_frame_t * frame)
731 {
732   return ip6_lookup_inline (vm, node, frame);
733 }
734
735 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
736
737 VLIB_REGISTER_NODE (ip6_lookup_node) =
738 {
739   .name = "ip6-lookup",
740   .vector_size = sizeof (u32),
741   .format_trace = format_ip6_lookup_trace,
742   .n_next_nodes = IP6_LOOKUP_N_NEXT,
743   .next_nodes = IP6_LOOKUP_NEXT_NODES,
744 };
745
746 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
747                                       vlib_node_runtime_t * node,
748                                       vlib_frame_t * frame)
749 {
750   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
751   u32 n_left, *from;
752   u32 thread_index = vm->thread_index;
753   ip6_main_t *im = &ip6_main;
754   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
755   u16 nexts[VLIB_FRAME_SIZE], *next;
756
757   from = vlib_frame_vector_args (frame);
758   n_left = frame->n_vectors;
759   next = nexts;
760
761   vlib_get_buffers (vm, from, bufs, n_left);
762
763   while (n_left >= 4)
764     {
765       const load_balance_t *lb0, *lb1;
766       const ip6_header_t *ip0, *ip1;
767       u32 lbi0, hc0, lbi1, hc1;
768       const dpo_id_t *dpo0, *dpo1;
769
770       /* Prefetch next iteration. */
771       {
772         vlib_prefetch_buffer_header (b[2], STORE);
773         vlib_prefetch_buffer_header (b[3], STORE);
774
775         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
776         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
777       }
778
779       ip0 = vlib_buffer_get_current (b[0]);
780       ip1 = vlib_buffer_get_current (b[1]);
781       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
782       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
783
784       lb0 = load_balance_get (lbi0);
785       lb1 = load_balance_get (lbi1);
786
787       /*
788        * this node is for via FIBs we can re-use the hash value from the
789        * to node if present.
790        * We don't want to use the same hash value at each level in the recursion
791        * graph as that would lead to polarisation
792        */
793       hc0 = hc1 = 0;
794
795       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
796         {
797           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
798             {
799               hc0 = vnet_buffer (b[0])->ip.flow_hash =
800                 vnet_buffer (b[0])->ip.flow_hash >> 1;
801             }
802           else
803             {
804               hc0 = vnet_buffer (b[0])->ip.flow_hash =
805                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
806             }
807           dpo0 = load_balance_get_fwd_bucket
808             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
809         }
810       else
811         {
812           dpo0 = load_balance_get_bucket_i (lb0, 0);
813         }
814       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
815         {
816           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
817             {
818               hc1 = vnet_buffer (b[1])->ip.flow_hash =
819                 vnet_buffer (b[1])->ip.flow_hash >> 1;
820             }
821           else
822             {
823               hc1 = vnet_buffer (b[1])->ip.flow_hash =
824                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
825             }
826           dpo1 = load_balance_get_fwd_bucket
827             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
828         }
829       else
830         {
831           dpo1 = load_balance_get_bucket_i (lb1, 0);
832         }
833
834       next[0] = dpo0->dpoi_next_node;
835       next[1] = dpo1->dpoi_next_node;
836
837       /* Only process the HBH Option Header if explicitly configured to do so */
838       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
839         {
840           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
841             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
842         }
843       /* Only process the HBH Option Header if explicitly configured to do so */
844       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
845         {
846           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
847             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
848         }
849
850       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
851       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
852
853       vlib_increment_combined_counter
854         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
855       vlib_increment_combined_counter
856         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
857
858       b += 2;
859       next += 2;
860       n_left -= 2;
861     }
862
863   while (n_left > 0)
864     {
865       const load_balance_t *lb0;
866       const ip6_header_t *ip0;
867       const dpo_id_t *dpo0;
868       u32 lbi0, hc0;
869
870       ip0 = vlib_buffer_get_current (b[0]);
871       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
872
873       lb0 = load_balance_get (lbi0);
874
875       hc0 = 0;
876       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
877         {
878           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
879             {
880               hc0 = vnet_buffer (b[0])->ip.flow_hash =
881                 vnet_buffer (b[0])->ip.flow_hash >> 1;
882             }
883           else
884             {
885               hc0 = vnet_buffer (b[0])->ip.flow_hash =
886                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
887             }
888           dpo0 = load_balance_get_fwd_bucket
889             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
890         }
891       else
892         {
893           dpo0 = load_balance_get_bucket_i (lb0, 0);
894         }
895
896       next[0] = dpo0->dpoi_next_node;
897       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
898
899       /* Only process the HBH Option Header if explicitly configured to do so */
900       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
901         {
902           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
903             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
904         }
905
906       vlib_increment_combined_counter
907         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
908
909       b += 1;
910       next += 1;
911       n_left -= 1;
912     }
913
914   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
915
916   if (node->flags & VLIB_NODE_FLAG_TRACE)
917     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
918
919   return frame->n_vectors;
920 }
921
922 VLIB_REGISTER_NODE (ip6_load_balance_node) =
923 {
924   .name = "ip6-load-balance",
925   .vector_size = sizeof (u32),
926   .sibling_of = "ip6-lookup",
927   .format_trace = format_ip6_lookup_trace,
928 };
929
930 typedef struct
931 {
932   /* Adjacency taken. */
933   u32 adj_index;
934   u32 flow_hash;
935   u32 fib_index;
936
937   /* Packet data, possibly *after* rewrite. */
938   u8 packet_data[128 - 1 * sizeof (u32)];
939 }
940 ip6_forward_next_trace_t;
941
942 #ifndef CLIB_MARCH_VARIANT
943 u8 *
944 format_ip6_forward_next_trace (u8 * s, va_list * args)
945 {
946   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
947   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
948   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
949   u32 indent = format_get_indent (s);
950
951   s = format (s, "%Ufib:%d adj:%d flow:%d",
952               format_white_space, indent,
953               t->fib_index, t->adj_index, t->flow_hash);
954   s = format (s, "\n%U%U",
955               format_white_space, indent,
956               format_ip6_header, t->packet_data, sizeof (t->packet_data));
957   return s;
958 }
959 #endif
960
961 static u8 *
962 format_ip6_lookup_trace (u8 * s, va_list * args)
963 {
964   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
965   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
966   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
967   u32 indent = format_get_indent (s);
968
969   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
970               t->fib_index, t->adj_index, t->flow_hash);
971   s = format (s, "\n%U%U",
972               format_white_space, indent,
973               format_ip6_header, t->packet_data, sizeof (t->packet_data));
974   return s;
975 }
976
977
978 static u8 *
979 format_ip6_rewrite_trace (u8 * s, va_list * args)
980 {
981   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
982   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
983   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
984   u32 indent = format_get_indent (s);
985
986   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
987               t->fib_index, t->adj_index, format_ip_adjacency,
988               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
989   s = format (s, "\n%U%U",
990               format_white_space, indent,
991               format_ip_adjacency_packet_data,
992               t->packet_data, sizeof (t->packet_data));
993   return s;
994 }
995
996 /* Common trace function for all ip6-forward next nodes. */
997 #ifndef CLIB_MARCH_VARIANT
998 void
999 ip6_forward_next_trace (vlib_main_t * vm,
1000                         vlib_node_runtime_t * node,
1001                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1002 {
1003   u32 *from, n_left;
1004   ip6_main_t *im = &ip6_main;
1005
1006   n_left = frame->n_vectors;
1007   from = vlib_frame_vector_args (frame);
1008
1009   while (n_left >= 4)
1010     {
1011       u32 bi0, bi1;
1012       vlib_buffer_t *b0, *b1;
1013       ip6_forward_next_trace_t *t0, *t1;
1014
1015       /* Prefetch next iteration. */
1016       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1017       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1018
1019       bi0 = from[0];
1020       bi1 = from[1];
1021
1022       b0 = vlib_get_buffer (vm, bi0);
1023       b1 = vlib_get_buffer (vm, bi1);
1024
1025       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1026         {
1027           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1028           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1029           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1030           t0->fib_index =
1031             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1032              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1033             vec_elt (im->fib_index_by_sw_if_index,
1034                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1035
1036           clib_memcpy_fast (t0->packet_data,
1037                             vlib_buffer_get_current (b0),
1038                             sizeof (t0->packet_data));
1039         }
1040       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1041         {
1042           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1043           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1044           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1045           t1->fib_index =
1046             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1047              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1048             vec_elt (im->fib_index_by_sw_if_index,
1049                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1050
1051           clib_memcpy_fast (t1->packet_data,
1052                             vlib_buffer_get_current (b1),
1053                             sizeof (t1->packet_data));
1054         }
1055       from += 2;
1056       n_left -= 2;
1057     }
1058
1059   while (n_left >= 1)
1060     {
1061       u32 bi0;
1062       vlib_buffer_t *b0;
1063       ip6_forward_next_trace_t *t0;
1064
1065       bi0 = from[0];
1066
1067       b0 = vlib_get_buffer (vm, bi0);
1068
1069       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1070         {
1071           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1072           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1073           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1074           t0->fib_index =
1075             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1076              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1077             vec_elt (im->fib_index_by_sw_if_index,
1078                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1079
1080           clib_memcpy_fast (t0->packet_data,
1081                             vlib_buffer_get_current (b0),
1082                             sizeof (t0->packet_data));
1083         }
1084       from += 1;
1085       n_left -= 1;
1086     }
1087 }
1088
1089 /* Compute TCP/UDP/ICMP6 checksum in software. */
1090 u16
1091 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1092                                    ip6_header_t * ip0, int *bogus_lengthp)
1093 {
1094   ip_csum_t sum0 = 0;
1095   u16 payload_length, payload_length_host_byte_order;
1096   u32 i;
1097   u32 headers_size = sizeof (ip0[0]);
1098   u8 *data_this_buffer;
1099   u8 next_hdr = ip0->protocol;
1100
1101   ASSERT (bogus_lengthp);
1102   *bogus_lengthp = 0;
1103
1104   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1105   data_this_buffer = (u8 *) (ip0 + 1);
1106   payload_length = ip0->payload_length;
1107
1108   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
1109    * or UDP-Ping packets */
1110   if (PREDICT_FALSE (next_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1111     {
1112       u32 skip_bytes;
1113       ip6_hop_by_hop_ext_t *ext_hdr =
1114         (ip6_hop_by_hop_ext_t *) data_this_buffer;
1115
1116       /* validate really icmp6 next */
1117       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1118               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
1119
1120       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
1121       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
1122
1123       payload_length_host_byte_order -= skip_bytes;
1124       headers_size += skip_bytes;
1125
1126       /* pseudo-header adjustments:
1127        *   exclude ext header bytes from payload length
1128        *   use payload IP proto rather than ext header IP proto
1129        */
1130       payload_length = clib_host_to_net_u16 (payload_length_host_byte_order);
1131       next_hdr = ext_hdr->next_hdr;
1132     }
1133
1134   /* Initialize checksum with ip pseudo-header. */
1135   sum0 = payload_length + clib_host_to_net_u16 (next_hdr);
1136
1137   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1138     {
1139       sum0 = ip_csum_with_carry
1140         (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1141       sum0 = ip_csum_with_carry
1142         (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1143     }
1144
1145   if (p0)
1146     return ip_calculate_l4_checksum (vm, p0, sum0,
1147                                      payload_length_host_byte_order,
1148                                      (u8 *) ip0, headers_size, NULL);
1149   else
1150     return ip_calculate_l4_checksum (vm, 0, sum0,
1151                                      payload_length_host_byte_order, NULL, 0,
1152                                      data_this_buffer);
1153 }
1154
1155 u32
1156 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1157 {
1158   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1159   udp_header_t *udp0;
1160   u16 sum16;
1161   int bogus_length;
1162
1163   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1164   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1165           || ip0->protocol == IP_PROTOCOL_ICMP6
1166           || ip0->protocol == IP_PROTOCOL_UDP
1167           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1168
1169   udp0 = (void *) (ip0 + 1);
1170   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1171     {
1172       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1173                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1174       return p0->flags;
1175     }
1176
1177   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1178
1179   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1180                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1181
1182   return p0->flags;
1183 }
1184 #endif
1185
1186 /**
1187  * @brief returns number of links on which src is reachable.
1188  */
1189 always_inline int
1190 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1191 {
1192   const load_balance_t *lb0;
1193   index_t lbi;
1194   u32 fib_index;
1195
1196   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1197                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1198   fib_index =
1199     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1200     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1201
1202   lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address);
1203   lb0 = load_balance_get (lbi);
1204
1205   return (fib_urpf_check_size (lb0->lb_urpf));
1206 }
1207
1208 always_inline u8
1209 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1210                            u32 * udp_offset0)
1211 {
1212   int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
1213   if (nh > 0)
1214     if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
1215       return nh;
1216   return 0;
1217 }
1218
1219 VNET_FEATURE_ARC_INIT (ip6_local) = {
1220   .arc_name = "ip6-local",
1221   .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
1222 };
1223
1224 static_always_inline u8
1225 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
1226 {
1227
1228   u16 payload_length_host_byte_order;
1229   u32 n_this_buffer, n_bytes_left;
1230   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1231   u32 headers_size = sizeof (ip0[0]);
1232   u8 *data_this_buffer;
1233
1234
1235   data_this_buffer = (u8 *) (ip0 + 1);
1236
1237   ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *) data_this_buffer;
1238
1239   /* validate really icmp6 next */
1240
1241   if (!(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1242       || (ext_hdr->next_hdr == IP_PROTOCOL_UDP))
1243     return 0;
1244
1245
1246   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1247   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1248
1249
1250   u32 n_ip_bytes_this_buffer =
1251     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1252   if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
1253     {
1254       n_this_buffer = p0->current_length > headers_size ?
1255         n_ip_bytes_this_buffer - headers_size : 0;
1256     }
1257
1258   n_bytes_left -= n_this_buffer;
1259   n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
1260
1261   if (n_bytes_left == 0)
1262     return 0;
1263   else
1264     return 1;
1265 }
1266
1267 always_inline uword
1268 ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1269                   vlib_frame_t *frame, int head_of_feature_arc,
1270                   int is_receive_dpo)
1271 {
1272   ip6_main_t *im = &ip6_main;
1273   ip_lookup_main_t *lm = &im->lookup_main;
1274   u32 *from, n_left_from;
1275   vlib_node_runtime_t *error_node =
1276     vlib_node_get_runtime (vm, ip6_input_node.index);
1277   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1278   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1279   u16 nexts[VLIB_FRAME_SIZE], *next;
1280
1281   from = vlib_frame_vector_args (frame);
1282   n_left_from = frame->n_vectors;
1283
1284   if (node->flags & VLIB_NODE_FLAG_TRACE)
1285     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1286
1287   vlib_get_buffers (vm, from, bufs, n_left_from);
1288   b = bufs;
1289   next = nexts;
1290
1291   while (n_left_from > 2)
1292     {
1293       /* Prefetch next iteration. */
1294       if (n_left_from >= 6)
1295         {
1296           vlib_prefetch_buffer_header (b[4], STORE);
1297           vlib_prefetch_buffer_header (b[5], STORE);
1298           vlib_prefetch_buffer_data (b[2], LOAD);
1299           vlib_prefetch_buffer_data (b[3], LOAD);
1300         }
1301
1302       vl_counter_ip6_enum_t error[2];
1303       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1304       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1305
1306       ip6_header_t *ip[2];
1307       ip[0] = vlib_buffer_get_current (b[0]);
1308       ip[1] = vlib_buffer_get_current (b[1]);
1309
1310       if (head_of_feature_arc)
1311         {
1312           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1313           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1314
1315           u8 type[2];
1316           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1317           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1318
1319           u32 flags[2];
1320           flags[0] = b[0]->flags;
1321           flags[1] = b[1]->flags;
1322
1323           vnet_buffer_oflags_t oflags[2];
1324           oflags[0] = vnet_buffer (b[0])->oflags;
1325           oflags[1] = vnet_buffer (b[1])->oflags;
1326
1327           u32 l4_offload[2];
1328           l4_offload[0] = (flags[0] & VNET_BUFFER_F_OFFLOAD) &&
1329                           (oflags[0] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1330                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1331           l4_offload[1] = (flags[1] & VNET_BUFFER_F_OFFLOAD) &&
1332                           (oflags[1] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1333                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1334
1335           u32 good_l4_csum[2];
1336           good_l4_csum[0] =
1337             (flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[0];
1338           good_l4_csum[1] =
1339             (flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[1];
1340
1341           u32 udp_offset[2] = { };
1342           u8 is_tcp_udp[2];
1343           is_tcp_udp[0] =
1344             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1345           is_tcp_udp[1] =
1346             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1347           i16 len_diff[2] = { 0 };
1348           if (PREDICT_TRUE (is_tcp_udp[0]))
1349             {
1350               udp_header_t *udp =
1351                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1352               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1353                 && udp->checksum == 0;
1354               /* optimistically verify UDP length. */
1355               u16 ip_len, udp_len;
1356               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1357               udp_len = clib_net_to_host_u16 (udp->length);
1358               len_diff[0] = ip_len - udp_len;
1359             }
1360           if (PREDICT_TRUE (is_tcp_udp[1]))
1361             {
1362               udp_header_t *udp =
1363                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1364               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1365                 && udp->checksum == 0;
1366               /* optimistically verify UDP length. */
1367               u16 ip_len, udp_len;
1368               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1369               udp_len = clib_net_to_host_u16 (udp->length);
1370               len_diff[1] = ip_len - udp_len;
1371             }
1372
1373           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1374           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1375
1376           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1377           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1378
1379           u8 need_csum[2];
1380           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1381             && !good_l4_csum[0]
1382             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1383           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1384             && !good_l4_csum[1]
1385             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1386           if (PREDICT_FALSE (need_csum[0]))
1387             {
1388               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1389               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1390               error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1391             }
1392           else
1393             {
1394               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1395                 error[0] = IP6_ERROR_BAD_LENGTH;
1396             }
1397           if (PREDICT_FALSE (need_csum[1]))
1398             {
1399               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1400               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1401               error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1402             }
1403           else
1404             {
1405               if (ip6_tcp_udp_icmp_bad_length (vm, b[1]))
1406                 error[1] = IP6_ERROR_BAD_LENGTH;
1407             }
1408
1409
1410           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1411
1412           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1413
1414           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1415                          IP6_ERROR_UDP_CHECKSUM,
1416                          "Wrong IP6 errors constants");
1417           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1418                          IP6_ERROR_ICMP_CHECKSUM,
1419                          "Wrong IP6 errors constants");
1420
1421           error[0] =
1422             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1423           error[1] =
1424             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1425
1426           /* Drop packets from unroutable hosts. */
1427           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1428           u8 unroutable[2];
1429           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1430             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1431             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1432           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1433             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1434             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1435           if (PREDICT_FALSE (unroutable[0]))
1436             {
1437               error[0] =
1438                 !ip6_urpf_loose_check (im, b[0],
1439                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1440                 : error[0];
1441             }
1442           if (PREDICT_FALSE (unroutable[1]))
1443             {
1444               error[1] =
1445                 !ip6_urpf_loose_check (im, b[1],
1446                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1447                 : error[1];
1448             }
1449
1450           vnet_buffer (b[0])->ip.fib_index =
1451             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1452             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1453             vnet_buffer (b[0])->ip.fib_index;
1454           vnet_buffer (b[1])->ip.fib_index =
1455             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1456             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1457             vnet_buffer (b[1])->ip.fib_index;
1458
1459           vnet_buffer (b[0])->ip.rx_sw_if_index =
1460             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1461           vnet_buffer (b[1])->ip.rx_sw_if_index =
1462             vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1463           if (is_receive_dpo)
1464             {
1465               const receive_dpo_t *rd0, *rd1;
1466               rd0 =
1467                 receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1468               rd1 =
1469                 receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1470               if (rd0->rd_sw_if_index != ~0)
1471                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1472               if (rd1->rd_sw_if_index != ~0)
1473                 vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1474             }
1475         }                       /* head_of_feature_arc */
1476
1477       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1478       next[0] =
1479         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1480       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1481       next[1] =
1482         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1483
1484       b[0]->error = error_node->errors[error[0]];
1485       b[1]->error = error_node->errors[error[1]];
1486
1487       if (head_of_feature_arc)
1488         {
1489           u8 ip6_unknown[2];
1490           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1491           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1492           if (PREDICT_TRUE (ip6_unknown[0]))
1493             {
1494               u32 next32 = next[0];
1495               vnet_feature_arc_start (arc_index,
1496                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1497                                       &next32, b[0]);
1498               next[0] = next32;
1499             }
1500           if (PREDICT_TRUE (ip6_unknown[1]))
1501             {
1502               u32 next32 = next[1];
1503               vnet_feature_arc_start (arc_index,
1504                                       vnet_buffer (b[1])->ip.rx_sw_if_index,
1505                                       &next32, b[1]);
1506               next[1] = next32;
1507             }
1508         }
1509
1510       /* next */
1511       b += 2;
1512       next += 2;
1513       n_left_from -= 2;
1514     }
1515
1516   while (n_left_from)
1517     {
1518       u8 error;
1519       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1520
1521       ip6_header_t *ip;
1522       ip = vlib_buffer_get_current (b[0]);
1523
1524       if (head_of_feature_arc)
1525         {
1526           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1527           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1528
1529           u32 flags = b[0]->flags;
1530
1531           vnet_buffer_oflags_t oflags = vnet_buffer (b[0])->oflags;
1532
1533           u32 l4_offload = (flags & VNET_BUFFER_F_OFFLOAD) &&
1534                            (oflags & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1535                                       VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1536
1537           u32 good_l4_csum =
1538             (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload;
1539           u32 udp_offset;
1540           i16 len_diff = 0;
1541           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1542           if (PREDICT_TRUE (is_tcp_udp))
1543             {
1544               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1545               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1546               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1547                 && udp->checksum == 0;
1548               /* optimistically verify UDP length. */
1549               u16 ip_len, udp_len;
1550               ip_len = clib_net_to_host_u16 (ip->payload_length);
1551               udp_len = clib_net_to_host_u16 (udp->length);
1552               len_diff = ip_len - udp_len;
1553             }
1554
1555           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1556           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1557
1558           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN &&
1559                          !good_l4_csum &&
1560                          !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1561           if (PREDICT_FALSE (need_csum))
1562             {
1563               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1564               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1565               error = IP6_ERROR_UNKNOWN_PROTOCOL;
1566             }
1567           else
1568             {
1569               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1570                 error = IP6_ERROR_BAD_LENGTH;
1571             }
1572
1573
1574
1575           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1576           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1577                          IP6_ERROR_UDP_CHECKSUM,
1578                          "Wrong IP6 errors constants");
1579           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1580                          IP6_ERROR_ICMP_CHECKSUM,
1581                          "Wrong IP6 errors constants");
1582
1583           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1584
1585           /* Drop packets from unroutable hosts. */
1586           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1587           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1588             && type != IP_BUILTIN_PROTOCOL_ICMP
1589             && !ip6_address_is_link_local_unicast (&ip->src_address);
1590           if (PREDICT_FALSE (unroutable))
1591             {
1592               error =
1593                 !ip6_urpf_loose_check (im, b[0],
1594                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1595                 error;
1596             }
1597
1598           vnet_buffer (b[0])->ip.fib_index =
1599             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1600             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1601             vnet_buffer (b[0])->ip.fib_index;
1602
1603           vnet_buffer (b[0])->ip.rx_sw_if_index =
1604             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1605           if (is_receive_dpo)
1606             {
1607               receive_dpo_t *rd;
1608               rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1609               if (rd->rd_sw_if_index != ~0)
1610                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
1611             }
1612         }                       /* head_of_feature_arc */
1613
1614       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1615       next[0] =
1616         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1617
1618       b[0]->error = error_node->errors[error];
1619
1620       if (head_of_feature_arc)
1621         {
1622           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1623             {
1624               u32 next32 = next[0];
1625               vnet_feature_arc_start (arc_index,
1626                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1627                                       &next32, b[0]);
1628               next[0] = next32;
1629             }
1630         }
1631
1632       /* next */
1633       b += 1;
1634       next += 1;
1635       n_left_from -= 1;
1636     }
1637
1638   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1639   return frame->n_vectors;
1640 }
1641
1642 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1643                                vlib_frame_t * frame)
1644 {
1645   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1646                            0 /* ip6_local_inline */);
1647 }
1648
1649 VLIB_REGISTER_NODE (ip6_local_node) =
1650 {
1651   .name = "ip6-local",
1652   .vector_size = sizeof (u32),
1653   .format_trace = format_ip6_forward_next_trace,
1654   .n_errors = IP6_N_ERROR,
1655   .error_counters = ip6_error_counters,
1656   .n_next_nodes = IP_LOCAL_N_NEXT,
1657   .next_nodes =
1658   {
1659     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1660     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1661     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1662     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1663     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
1664   },
1665 };
1666
1667 VLIB_NODE_FN (ip6_receive_local_node)
1668 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1669 {
1670   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1671                            1 /* is_receive_dpo */);
1672 }
1673
1674 VLIB_REGISTER_NODE (ip6_receive_local_node) = {
1675   .name = "ip6-receive",
1676   .vector_size = sizeof (u32),
1677   .format_trace = format_ip6_forward_next_trace,
1678   .sibling_of = "ip6-local"
1679 };
1680
1681 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1682                                           vlib_node_runtime_t * node,
1683                                           vlib_frame_t * frame)
1684 {
1685   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */,
1686                            0 /* ip6_local_inline */);
1687 }
1688
1689 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1690   .name = "ip6-local-end-of-arc",
1691   .vector_size = sizeof (u32),
1692
1693   .format_trace = format_ip6_forward_next_trace,
1694   .sibling_of = "ip6-local",
1695 };
1696
1697 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1698   .arc_name = "ip6-local",
1699   .node_name = "ip6-local-end-of-arc",
1700   .runs_before = 0, /* not before any other features */
1701 };
1702
1703 #ifdef CLIB_MARCH_VARIANT
1704 extern vlib_node_registration_t ip6_local_node;
1705 #else
1706 void
1707 ip6_register_protocol (u32 protocol, u32 node_index)
1708 {
1709   vlib_main_t *vm = vlib_get_main ();
1710   ip6_main_t *im = &ip6_main;
1711   ip_lookup_main_t *lm = &im->lookup_main;
1712
1713   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1714   lm->local_next_by_ip_protocol[protocol] =
1715     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1716 }
1717
1718 void
1719 ip6_unregister_protocol (u32 protocol)
1720 {
1721   ip6_main_t *im = &ip6_main;
1722   ip_lookup_main_t *lm = &im->lookup_main;
1723
1724   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1725   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1726 }
1727 #endif
1728
1729 typedef enum
1730 {
1731   IP6_REWRITE_NEXT_DROP,
1732   IP6_REWRITE_NEXT_ICMP_ERROR,
1733   IP6_REWRITE_NEXT_FRAGMENT,
1734   IP6_REWRITE_N_NEXT            /* Last */
1735 } ip6_rewrite_next_t;
1736
1737 /**
1738  * This bits of an IPv6 address to mask to construct a multicast
1739  * MAC address
1740  */
1741 #define IP6_MCAST_ADDR_MASK 0xffffffff
1742
1743 always_inline void
1744 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1745                u16 adj_packet_bytes, bool is_locally_generated,
1746                u32 * next, u8 is_midchain, u32 * error)
1747 {
1748   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1749     {
1750       if (is_locally_generated)
1751         {
1752           /* IP fragmentation */
1753           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1754                                    (is_midchain ?
1755                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1756                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1757           *next = IP6_REWRITE_NEXT_FRAGMENT;
1758           *error = IP6_ERROR_MTU_EXCEEDED;
1759         }
1760       else
1761         {
1762           *error = IP6_ERROR_MTU_EXCEEDED;
1763           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1764                                        adj_packet_bytes);
1765           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1766         }
1767     }
1768 }
1769
1770 always_inline uword
1771 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1772                              vlib_node_runtime_t * node,
1773                              vlib_frame_t * frame,
1774                              int do_counters, int is_midchain, int is_mcast)
1775 {
1776   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1777   u32 *from = vlib_frame_vector_args (frame);
1778   u32 n_left_from, n_left_to_next, *to_next, next_index;
1779   vlib_node_runtime_t *error_node =
1780     vlib_node_get_runtime (vm, ip6_input_node.index);
1781
1782   n_left_from = frame->n_vectors;
1783   next_index = node->cached_next_index;
1784   u32 thread_index = vm->thread_index;
1785
1786   while (n_left_from > 0)
1787     {
1788       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1789
1790       while (n_left_from >= 4 && n_left_to_next >= 2)
1791         {
1792           const ip_adjacency_t *adj0, *adj1;
1793           vlib_buffer_t *p0, *p1;
1794           ip6_header_t *ip0, *ip1;
1795           u32 pi0, rw_len0, next0, error0, adj_index0;
1796           u32 pi1, rw_len1, next1, error1, adj_index1;
1797           u32 tx_sw_if_index0, tx_sw_if_index1;
1798           bool is_locally_originated0, is_locally_originated1;
1799
1800           /* Prefetch next iteration. */
1801           {
1802             vlib_buffer_t *p2, *p3;
1803
1804             p2 = vlib_get_buffer (vm, from[2]);
1805             p3 = vlib_get_buffer (vm, from[3]);
1806
1807             vlib_prefetch_buffer_header (p2, LOAD);
1808             vlib_prefetch_buffer_header (p3, LOAD);
1809
1810             clib_prefetch_store (p2->pre_data);
1811             clib_prefetch_store (p3->pre_data);
1812
1813             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1814             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1815           }
1816
1817           pi0 = to_next[0] = from[0];
1818           pi1 = to_next[1] = from[1];
1819
1820           from += 2;
1821           n_left_from -= 2;
1822           to_next += 2;
1823           n_left_to_next -= 2;
1824
1825           p0 = vlib_get_buffer (vm, pi0);
1826           p1 = vlib_get_buffer (vm, pi1);
1827
1828           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1829           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1830
1831           ip0 = vlib_buffer_get_current (p0);
1832           ip1 = vlib_buffer_get_current (p1);
1833
1834           error0 = error1 = IP6_ERROR_NONE;
1835           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1836
1837           is_locally_originated0 =
1838             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1839           if (PREDICT_TRUE (!is_locally_originated0))
1840             {
1841               i32 hop_limit0 = ip0->hop_limit;
1842
1843               /* Input node should have reject packets with hop limit 0. */
1844               ASSERT (ip0->hop_limit > 0);
1845
1846               hop_limit0 -= 1;
1847
1848               ip0->hop_limit = hop_limit0;
1849
1850               /*
1851                * If the hop count drops below 1 when forwarding, generate
1852                * an ICMP response.
1853                */
1854               if (PREDICT_FALSE (hop_limit0 <= 0))
1855                 {
1856                   error0 = IP6_ERROR_TIME_EXPIRED;
1857                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1858                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1859                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1860                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1861                                                0);
1862                 }
1863             }
1864
1865           is_locally_originated1 =
1866             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1867           if (PREDICT_TRUE (!is_locally_originated1))
1868             {
1869               i32 hop_limit1 = ip1->hop_limit;
1870
1871               /* Input node should have reject packets with hop limit 0. */
1872               ASSERT (ip1->hop_limit > 0);
1873
1874               hop_limit1 -= 1;
1875
1876               ip1->hop_limit = hop_limit1;
1877
1878               /*
1879                * If the hop count drops below 1 when forwarding, generate
1880                * an ICMP response.
1881                */
1882               if (PREDICT_FALSE (hop_limit1 <= 0))
1883                 {
1884                   error1 = IP6_ERROR_TIME_EXPIRED;
1885                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1886                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1887                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1888                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1889                                                0);
1890                 }
1891             }
1892
1893           adj0 = adj_get (adj_index0);
1894           adj1 = adj_get (adj_index1);
1895
1896           rw_len0 = adj0[0].rewrite_header.data_bytes;
1897           rw_len1 = adj1[0].rewrite_header.data_bytes;
1898           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1899           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1900
1901           if (do_counters)
1902             {
1903               vlib_increment_combined_counter
1904                 (&adjacency_counters,
1905                  thread_index, adj_index0, 1,
1906                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1907               vlib_increment_combined_counter
1908                 (&adjacency_counters,
1909                  thread_index, adj_index1, 1,
1910                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1911             }
1912
1913           /* Check MTU of outgoing interface. */
1914           u16 ip0_len =
1915             clib_net_to_host_u16 (ip0->payload_length) +
1916             sizeof (ip6_header_t);
1917           u16 ip1_len =
1918             clib_net_to_host_u16 (ip1->payload_length) +
1919             sizeof (ip6_header_t);
1920           if (p0->flags & VNET_BUFFER_F_GSO)
1921             ip0_len = gso_mtu_sz (p0);
1922           if (p1->flags & VNET_BUFFER_F_GSO)
1923             ip1_len = gso_mtu_sz (p1);
1924
1925           ip6_mtu_check (p0, ip0_len,
1926                          adj0[0].rewrite_header.max_l3_packet_bytes,
1927                          is_locally_originated0, &next0, is_midchain,
1928                          &error0);
1929           ip6_mtu_check (p1, ip1_len,
1930                          adj1[0].rewrite_header.max_l3_packet_bytes,
1931                          is_locally_originated1, &next1, is_midchain,
1932                          &error1);
1933           /* Don't adjust the buffer for hop count issue; icmp-error node
1934            * wants to see the IP header */
1935           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1936             {
1937               p0->current_data -= rw_len0;
1938               p0->current_length += rw_len0;
1939               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1940               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1941               next0 = adj0[0].rewrite_header.next_index;
1942               if (PREDICT_FALSE
1943                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1944                 vnet_feature_arc_start_w_cfg_index
1945                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
1946                    adj0->ia_cfg_index);
1947             }
1948           else
1949             {
1950               p0->error = error_node->errors[error0];
1951             }
1952           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1953             {
1954               p1->current_data -= rw_len1;
1955               p1->current_length += rw_len1;
1956
1957               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1958               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1959               next1 = adj1[0].rewrite_header.next_index;
1960
1961               if (PREDICT_FALSE
1962                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1963                 vnet_feature_arc_start_w_cfg_index
1964                   (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
1965                    adj1->ia_cfg_index);
1966             }
1967           else
1968             {
1969               p1->error = error_node->errors[error1];
1970             }
1971
1972           if (is_midchain)
1973             {
1974               /* Guess we are only writing on ipv6 header. */
1975               vnet_rewrite_two_headers (adj0[0], adj1[0],
1976                                         ip0, ip1, sizeof (ip6_header_t));
1977             }
1978           else
1979             /* Guess we are only writing on simple Ethernet header. */
1980             vnet_rewrite_two_headers (adj0[0], adj1[0],
1981                                       ip0, ip1, sizeof (ethernet_header_t));
1982
1983           if (is_midchain)
1984             {
1985               if (adj0->sub_type.midchain.fixup_func)
1986                 adj0->sub_type.midchain.fixup_func
1987                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1988               if (adj1->sub_type.midchain.fixup_func)
1989                 adj1->sub_type.midchain.fixup_func
1990                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1991             }
1992           if (is_mcast)
1993             {
1994               /*
1995                * copy bytes from the IP address into the MAC rewrite
1996                */
1997               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1998                                           adj0->
1999                                           rewrite_header.dst_mcast_offset,
2000                                           &ip0->dst_address.as_u32[3],
2001                                           (u8 *) ip0);
2002               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2003                                           adj1->
2004                                           rewrite_header.dst_mcast_offset,
2005                                           &ip1->dst_address.as_u32[3],
2006                                           (u8 *) ip1);
2007             }
2008
2009           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2010                                            to_next, n_left_to_next,
2011                                            pi0, pi1, next0, next1);
2012         }
2013
2014       while (n_left_from > 0 && n_left_to_next > 0)
2015         {
2016           ip_adjacency_t *adj0;
2017           vlib_buffer_t *p0;
2018           ip6_header_t *ip0;
2019           u32 pi0, rw_len0;
2020           u32 adj_index0, next0, error0;
2021           u32 tx_sw_if_index0;
2022           bool is_locally_originated0;
2023
2024           pi0 = to_next[0] = from[0];
2025
2026           p0 = vlib_get_buffer (vm, pi0);
2027
2028           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2029
2030           adj0 = adj_get (adj_index0);
2031
2032           ip0 = vlib_buffer_get_current (p0);
2033
2034           error0 = IP6_ERROR_NONE;
2035           next0 = IP6_REWRITE_NEXT_DROP;
2036
2037           /* Check hop limit */
2038           is_locally_originated0 =
2039             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
2040           if (PREDICT_TRUE (!is_locally_originated0))
2041             {
2042               i32 hop_limit0 = ip0->hop_limit;
2043
2044               ASSERT (ip0->hop_limit > 0);
2045
2046               hop_limit0 -= 1;
2047
2048               ip0->hop_limit = hop_limit0;
2049
2050               if (PREDICT_FALSE (hop_limit0 <= 0))
2051                 {
2052                   /*
2053                    * If the hop count drops below 1 when forwarding, generate
2054                    * an ICMP response.
2055                    */
2056                   error0 = IP6_ERROR_TIME_EXPIRED;
2057                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2058                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2059                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
2060                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
2061                                                0);
2062                 }
2063             }
2064
2065           if (is_midchain)
2066             {
2067               /* Guess we are only writing on ip6 header. */
2068               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
2069             }
2070           else
2071             /* Guess we are only writing on simple Ethernet header. */
2072             vnet_rewrite_one_header (adj0[0], ip0,
2073                                      sizeof (ethernet_header_t));
2074
2075           /* Update packet buffer attributes/set output interface. */
2076           rw_len0 = adj0[0].rewrite_header.data_bytes;
2077           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2078
2079           if (do_counters)
2080             {
2081               vlib_increment_combined_counter
2082                 (&adjacency_counters,
2083                  thread_index, adj_index0, 1,
2084                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2085             }
2086
2087           /* Check MTU of outgoing interface. */
2088           u16 ip0_len =
2089             clib_net_to_host_u16 (ip0->payload_length) +
2090             sizeof (ip6_header_t);
2091           if (p0->flags & VNET_BUFFER_F_GSO)
2092             ip0_len = gso_mtu_sz (p0);
2093
2094           ip6_mtu_check (p0, ip0_len,
2095                          adj0[0].rewrite_header.max_l3_packet_bytes,
2096                          is_locally_originated0, &next0, is_midchain,
2097                          &error0);
2098           /* Don't adjust the buffer for hop count issue; icmp-error node
2099            * wants to see the IP header */
2100           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
2101             {
2102               p0->current_data -= rw_len0;
2103               p0->current_length += rw_len0;
2104
2105               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2106
2107               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2108               next0 = adj0[0].rewrite_header.next_index;
2109
2110               if (PREDICT_FALSE
2111                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2112                 vnet_feature_arc_start_w_cfg_index
2113                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
2114                    adj0->ia_cfg_index);
2115             }
2116           else
2117             {
2118               p0->error = error_node->errors[error0];
2119             }
2120
2121           if (is_midchain)
2122             {
2123               if (adj0->sub_type.midchain.fixup_func)
2124                 adj0->sub_type.midchain.fixup_func
2125                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2126             }
2127           if (is_mcast)
2128             {
2129               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2130                                           adj0->
2131                                           rewrite_header.dst_mcast_offset,
2132                                           &ip0->dst_address.as_u32[3],
2133                                           (u8 *) ip0);
2134             }
2135
2136           from += 1;
2137           n_left_from -= 1;
2138           to_next += 1;
2139           n_left_to_next -= 1;
2140
2141           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2142                                            to_next, n_left_to_next,
2143                                            pi0, next0);
2144         }
2145
2146       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2147     }
2148
2149   /* Need to do trace after rewrites to pick up new packet data. */
2150   if (node->flags & VLIB_NODE_FLAG_TRACE)
2151     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
2152
2153   return frame->n_vectors;
2154 }
2155
2156 always_inline uword
2157 ip6_rewrite_inline (vlib_main_t * vm,
2158                     vlib_node_runtime_t * node,
2159                     vlib_frame_t * frame,
2160                     int do_counters, int is_midchain, int is_mcast)
2161 {
2162   return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2163                                       is_midchain, is_mcast);
2164 }
2165
2166 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2167                                  vlib_node_runtime_t * node,
2168                                  vlib_frame_t * frame)
2169 {
2170   if (adj_are_counters_enabled ())
2171     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2172   else
2173     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2174 }
2175
2176 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2177                                        vlib_node_runtime_t * node,
2178                                        vlib_frame_t * frame)
2179 {
2180   if (adj_are_counters_enabled ())
2181     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2182   else
2183     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2184 }
2185
2186 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2187                                        vlib_node_runtime_t * node,
2188                                        vlib_frame_t * frame)
2189 {
2190   if (adj_are_counters_enabled ())
2191     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2192   else
2193     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2194 }
2195
2196 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2197                                   vlib_node_runtime_t * node,
2198                                   vlib_frame_t * frame)
2199 {
2200   if (adj_are_counters_enabled ())
2201     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2202   else
2203     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2204 }
2205
2206 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2207                                         vlib_node_runtime_t * node,
2208                                         vlib_frame_t * frame)
2209 {
2210   if (adj_are_counters_enabled ())
2211     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2212   else
2213     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2214 }
2215
2216 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2217   .name = "ip6-midchain",
2218   .vector_size = sizeof (u32),
2219   .format_trace = format_ip6_forward_next_trace,
2220   .sibling_of = "ip6-rewrite",
2221 };
2222
2223 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2224 {
2225   .name = "ip6-rewrite",
2226   .vector_size = sizeof (u32),
2227   .format_trace = format_ip6_rewrite_trace,
2228   .n_next_nodes = IP6_REWRITE_N_NEXT,
2229   .next_nodes =
2230   {
2231     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2232     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2233     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2234   },
2235 };
2236
2237 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2238   .name = "ip6-rewrite-bcast",
2239   .vector_size = sizeof (u32),
2240
2241   .format_trace = format_ip6_rewrite_trace,
2242   .sibling_of = "ip6-rewrite",
2243 };
2244
2245 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2246 {
2247   .name = "ip6-rewrite-mcast",
2248   .vector_size = sizeof (u32),
2249   .format_trace = format_ip6_rewrite_trace,
2250   .sibling_of = "ip6-rewrite",
2251 };
2252
2253
2254 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2255 {
2256   .name = "ip6-mcast-midchain",
2257   .vector_size = sizeof (u32),
2258   .format_trace = format_ip6_rewrite_trace,
2259   .sibling_of = "ip6-rewrite",
2260 };
2261
2262
2263 /*
2264  * Hop-by-Hop handling
2265  */
2266 #ifndef CLIB_MARCH_VARIANT
2267 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2268 #endif /* CLIB_MARCH_VARIANT */
2269
2270 #define foreach_ip6_hop_by_hop_error \
2271 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2272 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2273 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2274
2275 typedef enum
2276 {
2277 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2278   foreach_ip6_hop_by_hop_error
2279 #undef _
2280   IP6_HOP_BY_HOP_N_ERROR,
2281 } ip6_hop_by_hop_error_t;
2282
2283 /*
2284  * Primary h-b-h handler trace support
2285  * We work pretty hard on the problem for obvious reasons
2286  */
2287 typedef struct
2288 {
2289   u32 next_index;
2290   u32 trace_len;
2291   u8 option_data[256];
2292 } ip6_hop_by_hop_trace_t;
2293
2294 extern vlib_node_registration_t ip6_hop_by_hop_node;
2295
2296 static char *ip6_hop_by_hop_error_strings[] = {
2297 #define _(sym,string) string,
2298   foreach_ip6_hop_by_hop_error
2299 #undef _
2300 };
2301
2302 #ifndef CLIB_MARCH_VARIANT
2303 u8 *
2304 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2305 {
2306   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2307   int total_len = va_arg (*args, int);
2308   ip6_hop_by_hop_option_t *opt0, *limit0;
2309   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2310   u8 type0;
2311   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2312               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2313
2314   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2315   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2316
2317   while (opt0 < limit0)
2318     {
2319       type0 = opt0->type;
2320       switch (type0)
2321         {
2322         case 0:         /* Pad, just stop */
2323           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2324           break;
2325
2326         default:
2327           if (hm->trace[type0])
2328             {
2329               s = (*hm->trace[type0]) (s, opt0);
2330             }
2331           else
2332             {
2333               s = format (s, "\n    unrecognized option %d length %d", type0,
2334                           opt0->length);
2335             }
2336           opt0 =
2337             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2338                                          sizeof (ip6_hop_by_hop_option_t));
2339           break;
2340         }
2341     }
2342   return s;
2343 }
2344 #endif
2345
2346 static u8 *
2347 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2348 {
2349   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2350   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2351   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2352   ip6_hop_by_hop_header_t *hbh0;
2353   ip6_hop_by_hop_option_t *opt0, *limit0;
2354   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2355
2356   u8 type0;
2357
2358   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2359
2360   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2361               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2362
2363   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2364   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2365
2366   while (opt0 < limit0)
2367     {
2368       type0 = opt0->type;
2369       switch (type0)
2370         {
2371         case 0:         /* Pad, just stop */
2372           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2373           break;
2374
2375         default:
2376           if (hm->trace[type0])
2377             {
2378               s = (*hm->trace[type0]) (s, opt0);
2379             }
2380           else
2381             {
2382               s = format (s, "\n    unrecognized option %d length %d", type0,
2383                           opt0->length);
2384             }
2385           opt0 =
2386             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2387                                          sizeof (ip6_hop_by_hop_option_t));
2388           break;
2389         }
2390     }
2391   return s;
2392 }
2393
2394 always_inline u8
2395 ip6_scan_hbh_options (vlib_buffer_t * b0,
2396                       ip6_header_t * ip0,
2397                       ip6_hop_by_hop_header_t * hbh0,
2398                       ip6_hop_by_hop_option_t * opt0,
2399                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2400 {
2401   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2402   u8 type0;
2403   u8 error0 = 0;
2404
2405   while (opt0 < limit0)
2406     {
2407       type0 = opt0->type;
2408       switch (type0)
2409         {
2410         case 0:         /* Pad1 */
2411           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2412           continue;
2413         case 1:         /* PadN */
2414           break;
2415         default:
2416           if (hm->options[type0])
2417             {
2418               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2419                 {
2420                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2421                   return (error0);
2422                 }
2423             }
2424           else
2425             {
2426               /* Unrecognized mandatory option, check the two high order bits */
2427               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2428                 {
2429                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2430                   break;
2431                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2432                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2433                   *next0 = IP_LOOKUP_NEXT_DROP;
2434                   break;
2435                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2436                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2437                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2438                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2439                                                ICMP6_parameter_problem_unrecognized_option,
2440                                                (u8 *) opt0 - (u8 *) ip0);
2441                   break;
2442                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2443                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2444                   if (!ip6_address_is_multicast (&ip0->dst_address))
2445                     {
2446                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2447                       icmp6_error_set_vnet_buffer (b0,
2448                                                    ICMP6_parameter_problem,
2449                                                    ICMP6_parameter_problem_unrecognized_option,
2450                                                    (u8 *) opt0 - (u8 *) ip0);
2451                     }
2452                   else
2453                     {
2454                       *next0 = IP_LOOKUP_NEXT_DROP;
2455                     }
2456                   break;
2457                 }
2458               return (error0);
2459             }
2460         }
2461       opt0 =
2462         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2463                                      sizeof (ip6_hop_by_hop_option_t));
2464     }
2465   return (error0);
2466 }
2467
2468 /*
2469  * Process the Hop-by-Hop Options header
2470  */
2471 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2472                                     vlib_node_runtime_t * node,
2473                                     vlib_frame_t * frame)
2474 {
2475   vlib_node_runtime_t *error_node =
2476     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2477   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2478   u32 n_left_from, *from, *to_next;
2479   ip_lookup_next_t next_index;
2480
2481   from = vlib_frame_vector_args (frame);
2482   n_left_from = frame->n_vectors;
2483   next_index = node->cached_next_index;
2484
2485   while (n_left_from > 0)
2486     {
2487       u32 n_left_to_next;
2488
2489       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2490
2491       while (n_left_from >= 4 && n_left_to_next >= 2)
2492         {
2493           u32 bi0, bi1;
2494           vlib_buffer_t *b0, *b1;
2495           u32 next0, next1;
2496           ip6_header_t *ip0, *ip1;
2497           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2498           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2499           u8 error0 = 0, error1 = 0;
2500
2501           /* Prefetch next iteration. */
2502           {
2503             vlib_buffer_t *p2, *p3;
2504
2505             p2 = vlib_get_buffer (vm, from[2]);
2506             p3 = vlib_get_buffer (vm, from[3]);
2507
2508             vlib_prefetch_buffer_header (p2, LOAD);
2509             vlib_prefetch_buffer_header (p3, LOAD);
2510
2511             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2512             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2513           }
2514
2515           /* Speculatively enqueue b0, b1 to the current next frame */
2516           to_next[0] = bi0 = from[0];
2517           to_next[1] = bi1 = from[1];
2518           from += 2;
2519           to_next += 2;
2520           n_left_from -= 2;
2521           n_left_to_next -= 2;
2522
2523           b0 = vlib_get_buffer (vm, bi0);
2524           b1 = vlib_get_buffer (vm, bi1);
2525
2526           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2527           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2528           ip_adjacency_t *adj0 = adj_get (adj_index0);
2529           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2530           ip_adjacency_t *adj1 = adj_get (adj_index1);
2531
2532           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2533           next0 = adj0->lookup_next_index;
2534           next1 = adj1->lookup_next_index;
2535
2536           ip0 = vlib_buffer_get_current (b0);
2537           ip1 = vlib_buffer_get_current (b1);
2538           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2539           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2540           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2541           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2542           limit0 =
2543             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2544                                          ((hbh0->length + 1) << 3));
2545           limit1 =
2546             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2547                                          ((hbh1->length + 1) << 3));
2548
2549           /*
2550            * Basic validity checks
2551            */
2552           if ((hbh0->length + 1) << 3 >
2553               clib_net_to_host_u16 (ip0->payload_length))
2554             {
2555               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2556               next0 = IP_LOOKUP_NEXT_DROP;
2557               goto outdual;
2558             }
2559           /* Scan the set of h-b-h options, process ones that we understand */
2560           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2561
2562           if ((hbh1->length + 1) << 3 >
2563               clib_net_to_host_u16 (ip1->payload_length))
2564             {
2565               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2566               next1 = IP_LOOKUP_NEXT_DROP;
2567               goto outdual;
2568             }
2569           /* Scan the set of h-b-h options, process ones that we understand */
2570           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2571
2572         outdual:
2573           /* Has the classifier flagged this buffer for special treatment? */
2574           if (PREDICT_FALSE
2575               ((error0 == 0)
2576                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2577             next0 = hm->next_override;
2578
2579           /* Has the classifier flagged this buffer for special treatment? */
2580           if (PREDICT_FALSE
2581               ((error1 == 0)
2582                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2583             next1 = hm->next_override;
2584
2585           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2586             {
2587               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2588                 {
2589                   ip6_hop_by_hop_trace_t *t =
2590                     vlib_add_trace (vm, node, b0, sizeof (*t));
2591                   u32 trace_len = (hbh0->length + 1) << 3;
2592                   t->next_index = next0;
2593                   /* Capture the h-b-h option verbatim */
2594                   trace_len =
2595                     trace_len <
2596                     ARRAY_LEN (t->option_data) ? trace_len :
2597                     ARRAY_LEN (t->option_data);
2598                   t->trace_len = trace_len;
2599                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2600                 }
2601               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2602                 {
2603                   ip6_hop_by_hop_trace_t *t =
2604                     vlib_add_trace (vm, node, b1, sizeof (*t));
2605                   u32 trace_len = (hbh1->length + 1) << 3;
2606                   t->next_index = next1;
2607                   /* Capture the h-b-h option verbatim */
2608                   trace_len =
2609                     trace_len <
2610                     ARRAY_LEN (t->option_data) ? trace_len :
2611                     ARRAY_LEN (t->option_data);
2612                   t->trace_len = trace_len;
2613                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2614                 }
2615
2616             }
2617
2618           b0->error = error_node->errors[error0];
2619           b1->error = error_node->errors[error1];
2620
2621           /* verify speculative enqueue, maybe switch current next frame */
2622           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2623                                            n_left_to_next, bi0, bi1, next0,
2624                                            next1);
2625         }
2626
2627       while (n_left_from > 0 && n_left_to_next > 0)
2628         {
2629           u32 bi0;
2630           vlib_buffer_t *b0;
2631           u32 next0;
2632           ip6_header_t *ip0;
2633           ip6_hop_by_hop_header_t *hbh0;
2634           ip6_hop_by_hop_option_t *opt0, *limit0;
2635           u8 error0 = 0;
2636
2637           /* Speculatively enqueue b0 to the current next frame */
2638           bi0 = from[0];
2639           to_next[0] = bi0;
2640           from += 1;
2641           to_next += 1;
2642           n_left_from -= 1;
2643           n_left_to_next -= 1;
2644
2645           b0 = vlib_get_buffer (vm, bi0);
2646           /*
2647            * Default use the next_index from the adjacency.
2648            * A HBH option rarely redirects to a different node
2649            */
2650           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2651           ip_adjacency_t *adj0 = adj_get (adj_index0);
2652           next0 = adj0->lookup_next_index;
2653
2654           ip0 = vlib_buffer_get_current (b0);
2655           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2656           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2657           limit0 =
2658             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2659                                          ((hbh0->length + 1) << 3));
2660
2661           /*
2662            * Basic validity checks
2663            */
2664           if ((hbh0->length + 1) << 3 >
2665               clib_net_to_host_u16 (ip0->payload_length))
2666             {
2667               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2668               next0 = IP_LOOKUP_NEXT_DROP;
2669               goto out0;
2670             }
2671
2672           /* Scan the set of h-b-h options, process ones that we understand */
2673           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2674
2675         out0:
2676           /* Has the classifier flagged this buffer for special treatment? */
2677           if (PREDICT_FALSE
2678               ((error0 == 0)
2679                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2680             next0 = hm->next_override;
2681
2682           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2683             {
2684               ip6_hop_by_hop_trace_t *t =
2685                 vlib_add_trace (vm, node, b0, sizeof (*t));
2686               u32 trace_len = (hbh0->length + 1) << 3;
2687               t->next_index = next0;
2688               /* Capture the h-b-h option verbatim */
2689               trace_len =
2690                 trace_len <
2691                 ARRAY_LEN (t->option_data) ? trace_len :
2692                 ARRAY_LEN (t->option_data);
2693               t->trace_len = trace_len;
2694               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2695             }
2696
2697           b0->error = error_node->errors[error0];
2698
2699           /* verify speculative enqueue, maybe switch current next frame */
2700           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2701                                            n_left_to_next, bi0, next0);
2702         }
2703       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2704     }
2705   return frame->n_vectors;
2706 }
2707
2708 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2709 {
2710   .name = "ip6-hop-by-hop",
2711   .sibling_of = "ip6-lookup",
2712   .vector_size = sizeof (u32),
2713   .format_trace = format_ip6_hop_by_hop_trace,
2714   .type = VLIB_NODE_TYPE_INTERNAL,
2715   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2716   .error_strings = ip6_hop_by_hop_error_strings,
2717   .n_next_nodes = 0,
2718 };
2719
2720 static clib_error_t *
2721 ip6_hop_by_hop_init (vlib_main_t * vm)
2722 {
2723   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2724   clib_memset (hm->options, 0, sizeof (hm->options));
2725   clib_memset (hm->trace, 0, sizeof (hm->trace));
2726   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2727   return (0);
2728 }
2729
2730 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2731
2732 #ifndef CLIB_MARCH_VARIANT
2733 void
2734 ip6_hbh_set_next_override (uword next)
2735 {
2736   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2737
2738   hm->next_override = next;
2739 }
2740
2741 int
2742 ip6_hbh_register_option (u8 option,
2743                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2744                                       ip6_hop_by_hop_option_t * opt),
2745                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2746 {
2747   ip6_main_t *im = &ip6_main;
2748   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2749
2750   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2751
2752   /* Already registered */
2753   if (hm->options[option])
2754     return (-1);
2755
2756   hm->options[option] = options;
2757   hm->trace[option] = trace;
2758
2759   /* Set global variable */
2760   im->hbh_enabled = 1;
2761
2762   return (0);
2763 }
2764
2765 int
2766 ip6_hbh_unregister_option (u8 option)
2767 {
2768   ip6_main_t *im = &ip6_main;
2769   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2770
2771   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2772
2773   /* Not registered */
2774   if (!hm->options[option])
2775     return (-1);
2776
2777   hm->options[option] = NULL;
2778   hm->trace[option] = NULL;
2779
2780   /* Disable global knob if this was the last option configured */
2781   int i;
2782   bool found = false;
2783   for (i = 0; i < 256; i++)
2784     {
2785       if (hm->options[option])
2786         {
2787           found = true;
2788           break;
2789         }
2790     }
2791   if (!found)
2792     im->hbh_enabled = 0;
2793
2794   return (0);
2795 }
2796
2797 /* Global IP6 main. */
2798 ip6_main_t ip6_main;
2799 #endif
2800
2801 static clib_error_t *
2802 ip6_lookup_init (vlib_main_t * vm)
2803 {
2804   ip6_main_t *im = &ip6_main;
2805   clib_error_t *error;
2806   uword i;
2807
2808   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2809     return error;
2810
2811   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2812     {
2813       u32 j, i0, i1;
2814
2815       i0 = i / 32;
2816       i1 = i % 32;
2817
2818       for (j = 0; j < i0; j++)
2819         im->fib_masks[i].as_u32[j] = ~0;
2820
2821       if (i1)
2822         im->fib_masks[i].as_u32[i0] =
2823           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2824     }
2825
2826   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2827
2828   /* Create FIB with index 0 and table id of 0. */
2829   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2830                                      FIB_SOURCE_DEFAULT_ROUTE);
2831   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2832                                       MFIB_SOURCE_DEFAULT_ROUTE);
2833
2834   {
2835     pg_node_t *pn;
2836     pn = pg_get_node (ip6_lookup_node.index);
2837     pn->unformat_edit = unformat_pg_ip6_header;
2838   }
2839
2840   /* Unless explicitly configured, don't process HBH options */
2841   im->hbh_enabled = 0;
2842
2843   return error;
2844 }
2845
2846 VLIB_INIT_FUNCTION (ip6_lookup_init);
2847
2848 static clib_error_t *
2849 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2850                               unformat_input_t * input,
2851                               vlib_cli_command_t * cmd)
2852 {
2853   int matched = 0;
2854   u32 table_id = 0;
2855   u32 flow_hash_config = 0;
2856   int rv;
2857
2858   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2859     {
2860       if (unformat (input, "table %d", &table_id))
2861         matched = 1;
2862 #define _(a, b, v)                                                            \
2863   else if (unformat (input, #a))                                              \
2864   {                                                                           \
2865     flow_hash_config |= v;                                                    \
2866     matched = 1;                                                              \
2867   }
2868       foreach_flow_hash_bit
2869 #undef _
2870         else
2871         break;
2872     }
2873
2874   if (matched == 0)
2875     return clib_error_return (0, "unknown input `%U'",
2876                               format_unformat_error, input);
2877
2878   rv = ip_flow_hash_set (AF_IP6, table_id, flow_hash_config);
2879   switch (rv)
2880     {
2881     case 0:
2882       break;
2883
2884     case -1:
2885       return clib_error_return (0, "no such FIB table %d", table_id);
2886
2887     default:
2888       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2889       break;
2890     }
2891
2892   return 0;
2893 }
2894
2895 /*?
2896  * Configure the set of IPv6 fields used by the flow hash.
2897  *
2898  * @cliexpar
2899  * @parblock
2900  * Example of how to set the flow hash on a given table:
2901  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2902  *
2903  * Example of display the configured flow hash:
2904  * @cliexstart{show ip6 fib}
2905  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2906  * @::/0
2907  *   unicast-ip6-chain
2908  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2909  *     [0] [@0]: dpo-drop ip6
2910  * fe80::/10
2911  *   unicast-ip6-chain
2912  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2913  *     [0] [@2]: dpo-receive
2914  * ff02::1/128
2915  *   unicast-ip6-chain
2916  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2917  *     [0] [@2]: dpo-receive
2918  * ff02::2/128
2919  *   unicast-ip6-chain
2920  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2921  *     [0] [@2]: dpo-receive
2922  * ff02::16/128
2923  *   unicast-ip6-chain
2924  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2925  *     [0] [@2]: dpo-receive
2926  * ff02::1:ff00:0/104
2927  *   unicast-ip6-chain
2928  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2929  *     [0] [@2]: dpo-receive
2930  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2931  * @::/0
2932  *   unicast-ip6-chain
2933  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2934  *     [0] [@0]: dpo-drop ip6
2935  * @::a:1:1:0:4/126
2936  *   unicast-ip6-chain
2937  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2938  *     [0] [@4]: ipv6-glean: af_packet0
2939  * @::a:1:1:0:7/128
2940  *   unicast-ip6-chain
2941  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2942  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2943  * fe80::/10
2944  *   unicast-ip6-chain
2945  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2946  *     [0] [@2]: dpo-receive
2947  * fe80::fe:3eff:fe3e:9222/128
2948  *   unicast-ip6-chain
2949  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2950  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2951  * ff02::1/128
2952  *   unicast-ip6-chain
2953  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2954  *     [0] [@2]: dpo-receive
2955  * ff02::2/128
2956  *   unicast-ip6-chain
2957  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2958  *     [0] [@2]: dpo-receive
2959  * ff02::16/128
2960  *   unicast-ip6-chain
2961  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2962  *     [0] [@2]: dpo-receive
2963  * ff02::1:ff00:0/104
2964  *   unicast-ip6-chain
2965  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2966  *     [0] [@2]: dpo-receive
2967  * @cliexend
2968  * @endparblock
2969 ?*/
2970 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2971   .path = "set ip6 flow-hash",
2972   .short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
2973                 "[dport] [proto] [reverse] [flowlabel]",
2974   .function = set_ip6_flow_hash_command_fn,
2975 };
2976
2977 static clib_error_t *
2978 show_ip6_local_command_fn (vlib_main_t * vm,
2979                            unformat_input_t * input, vlib_cli_command_t * cmd)
2980 {
2981   ip6_main_t *im = &ip6_main;
2982   ip_lookup_main_t *lm = &im->lookup_main;
2983   int i;
2984
2985   vlib_cli_output (vm, "Protocols handled by ip6_local");
2986   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2987     {
2988       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2989         {
2990
2991           u32 node_index = vlib_get_node (vm,
2992                                           ip6_local_node.index)->
2993             next_nodes[lm->local_next_by_ip_protocol[i]];
2994           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2995                            node_index);
2996         }
2997     }
2998   return 0;
2999 }
3000
3001
3002
3003 /*?
3004  * Display the set of protocols handled by the local IPv6 stack.
3005  *
3006  * @cliexpar
3007  * Example of how to display local protocol table:
3008  * @cliexstart{show ip6 local}
3009  * Protocols handled by ip6_local
3010  * 17
3011  * 43
3012  * 58
3013  * 115
3014  * @cliexend
3015 ?*/
3016 VLIB_CLI_COMMAND (show_ip6_local, static) =
3017 {
3018   .path = "show ip6 local",
3019   .function = show_ip6_local_command_fn,
3020   .short_help = "show ip6 local",
3021 };
3022
3023 #ifndef CLIB_MARCH_VARIANT
3024 int
3025 vnet_set_ip6_classify_intfc (vlib_main_t *vm, u32 sw_if_index, u32 table_index)
3026 {
3027   vnet_main_t *vnm = vnet_get_main ();
3028   vnet_interface_main_t *im = &vnm->interface_main;
3029   ip6_main_t *ipm = &ip6_main;
3030   ip_lookup_main_t *lm = &ipm->lookup_main;
3031   vnet_classify_main_t *cm = &vnet_classify_main;
3032   ip6_address_t *if_addr;
3033
3034   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3035     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3036
3037   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3038     return VNET_API_ERROR_NO_SUCH_ENTRY;
3039
3040   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3041   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3042
3043   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3044
3045   if (NULL != if_addr)
3046     {
3047       fib_prefix_t pfx = {
3048         .fp_len = 128,
3049         .fp_proto = FIB_PROTOCOL_IP6,
3050         .fp_addr.ip6 = *if_addr,
3051       };
3052       u32 fib_index;
3053
3054       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3055                                                        sw_if_index);
3056       if (table_index != (u32) ~ 0)
3057         {
3058           dpo_id_t dpo = DPO_INVALID;
3059           dpo_set (&dpo,
3060                    DPO_CLASSIFY,
3061                    DPO_PROTO_IP6,
3062                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3063           fib_table_entry_special_dpo_add (fib_index,
3064                                            &pfx,
3065                                            FIB_SOURCE_CLASSIFY,
3066                                            FIB_ENTRY_FLAG_NONE, &dpo);
3067           dpo_reset (&dpo);
3068         }
3069       else
3070         {
3071           fib_table_entry_special_remove (fib_index,
3072                                           &pfx, FIB_SOURCE_CLASSIFY);
3073         }
3074     }
3075
3076   return 0;
3077 }
3078 #endif
3079
3080 static clib_error_t *
3081 set_ip6_classify_command_fn (vlib_main_t * vm,
3082                              unformat_input_t * input,
3083                              vlib_cli_command_t * cmd)
3084 {
3085   u32 table_index = ~0;
3086   int table_index_set = 0;
3087   u32 sw_if_index = ~0;
3088   int rv;
3089
3090   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3091     {
3092       if (unformat (input, "table-index %d", &table_index))
3093         table_index_set = 1;
3094       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3095                          vnet_get_main (), &sw_if_index))
3096         ;
3097       else
3098         break;
3099     }
3100
3101   if (table_index_set == 0)
3102     return clib_error_return (0, "classify table-index must be specified");
3103
3104   if (sw_if_index == ~0)
3105     return clib_error_return (0, "interface / subif must be specified");
3106
3107   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3108
3109   switch (rv)
3110     {
3111     case 0:
3112       break;
3113
3114     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3115       return clib_error_return (0, "No such interface");
3116
3117     case VNET_API_ERROR_NO_SUCH_ENTRY:
3118       return clib_error_return (0, "No such classifier table");
3119     }
3120   return 0;
3121 }
3122
3123 /*?
3124  * Assign a classification table to an interface. The classification
3125  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3126  * commands. Once the table is create, use this command to filter packets
3127  * on an interface.
3128  *
3129  * @cliexpar
3130  * Example of how to assign a classification table to an interface:
3131  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3132 ?*/
3133 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3134 {
3135   .path = "set ip6 classify",
3136   .short_help =
3137   "set ip6 classify intfc <interface> table-index <classify-idx>",
3138   .function = set_ip6_classify_command_fn,
3139 };
3140
3141 /*
3142  * fd.io coding-style-patch-verification: ON
3143  *
3144  * Local Variables:
3145  * eval: (c-set-style "gnu")
3146  * End:
3147  */