gso: use the header offsets from buffer metadata
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_link.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/receive_dpo.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/classify/vnet_classify.h>
54 #include <vnet/pg/pg.h>
55
56 #ifndef CLIB_MARCH_VARIANT
57 #include <vppinfra/bihash_template.c>
58 #endif
59 #include <vnet/ip/ip6_forward.h>
60 #include <vnet/interface_output.h>
61
62 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
63 #define OI_DECAP   0x80000000
64
65 static void
66 ip6_add_interface_prefix_routes (ip6_main_t * im,
67                                  u32 sw_if_index,
68                                  u32 fib_index,
69                                  ip6_address_t * address, u32 address_length)
70 {
71   ip_lookup_main_t *lm = &im->lookup_main;
72   ip_interface_prefix_t *if_prefix;
73
74   ip_interface_prefix_key_t key = {
75     .prefix = {
76       .fp_len = address_length,
77       .fp_proto = FIB_PROTOCOL_IP6,
78       .fp_addr.ip6 = {
79         .as_u64 = {
80           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
81           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
82         },
83       },
84     },
85     .sw_if_index = sw_if_index,
86   };
87
88   /* If prefix already set on interface, just increment ref count & return */
89   if_prefix = ip_get_interface_prefix (lm, &key);
90   if (if_prefix)
91     {
92       if_prefix->ref_count += 1;
93       return;
94     }
95
96   /* New prefix - allocate a pool entry, initialize it, add to the hash */
97   pool_get (lm->if_prefix_pool, if_prefix);
98   if_prefix->ref_count = 1;
99   clib_memcpy (&if_prefix->key, &key, sizeof (key));
100   mhash_set (&lm->prefix_to_if_prefix_index, &key,
101              if_prefix - lm->if_prefix_pool, 0 /* old value */ );
102
103   /* length < 128 - add glean */
104   if (address_length < 128)
105     {
106       /* set the glean route for the prefix */
107       fib_table_entry_update_one_path (fib_index, &key.prefix,
108                                        FIB_SOURCE_INTERFACE,
109                                        (FIB_ENTRY_FLAG_CONNECTED |
110                                         FIB_ENTRY_FLAG_ATTACHED),
111                                        DPO_PROTO_IP6,
112                                        /* No next-hop address */
113                                        NULL, sw_if_index,
114                                        /* invalid FIB index */
115                                        ~0, 1,
116                                        /* no out-label stack */
117                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
118     }
119 }
120
121 static void
122 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
123                           ip6_main_t * im, u32 fib_index,
124                           ip_interface_address_t * a)
125 {
126   ip_lookup_main_t *lm = &im->lookup_main;
127   ip6_address_t *address = ip_interface_address_get_address (lm, a);
128   fib_prefix_t pfx = {
129     .fp_len = a->address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   /* set special routes for the prefix if needed */
135   ip6_add_interface_prefix_routes (im, sw_if_index, fib_index,
136                                    address, a->address_length);
137
138   pfx.fp_len = 128;
139   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
140     {
141       u32 classify_table_index =
142         lm->classify_table_index_by_sw_if_index[sw_if_index];
143       if (classify_table_index != (u32) ~ 0)
144         {
145           dpo_id_t dpo = DPO_INVALID;
146
147           dpo_set (&dpo,
148                    DPO_CLASSIFY,
149                    DPO_PROTO_IP6,
150                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
151
152           fib_table_entry_special_dpo_add (fib_index,
153                                            &pfx,
154                                            FIB_SOURCE_CLASSIFY,
155                                            FIB_ENTRY_FLAG_NONE, &dpo);
156           dpo_reset (&dpo);
157         }
158     }
159
160   fib_table_entry_update_one_path (fib_index, &pfx,
161                                    FIB_SOURCE_INTERFACE,
162                                    (FIB_ENTRY_FLAG_CONNECTED |
163                                     FIB_ENTRY_FLAG_LOCAL),
164                                    DPO_PROTO_IP6,
165                                    &pfx.fp_addr,
166                                    sw_if_index, ~0,
167                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
168 }
169
170 static void
171 ip6_del_interface_prefix_routes (ip6_main_t * im,
172                                  u32 sw_if_index,
173                                  u32 fib_index,
174                                  ip6_address_t * address, u32 address_length)
175 {
176   ip_lookup_main_t *lm = &im->lookup_main;
177   ip_interface_prefix_t *if_prefix;
178
179   ip_interface_prefix_key_t key = {
180     .prefix = {
181       .fp_len = address_length,
182       .fp_proto = FIB_PROTOCOL_IP6,
183       .fp_addr.ip6 = {
184         .as_u64 = {
185           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
186           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
187         },
188       },
189     },
190     .sw_if_index = sw_if_index,
191   };
192
193   if_prefix = ip_get_interface_prefix (lm, &key);
194   if (!if_prefix)
195     {
196       clib_warning ("Prefix not found while deleting %U",
197                     format_ip6_address_and_length, address, address_length);
198       return;
199     }
200
201   /* If not deleting last intf addr in prefix, decrement ref count & return */
202   if_prefix->ref_count -= 1;
203   if (if_prefix->ref_count > 0)
204     return;
205
206   /* length <= 128, delete glean route */
207   if (address_length <= 128)
208     {
209       /* remove glean route for prefix */
210       fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE);
211     }
212
213   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ );
214   pool_put (lm->if_prefix_pool, if_prefix);
215 }
216
217 static void
218 ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im,
219                           u32 fib_index,
220                           ip6_address_t * address, u32 address_length)
221 {
222   fib_prefix_t pfx = {
223     .fp_len = 128,
224     .fp_proto = FIB_PROTOCOL_IP6,
225     .fp_addr.ip6 = *address,
226   };
227
228   /* delete special routes for the prefix if needed */
229   ip6_del_interface_prefix_routes (im, sw_if_index, fib_index,
230                                    address, address_length);
231
232   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
233 }
234
235 #ifndef CLIB_MARCH_VARIANT
236 void
237 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
238 {
239   ip6_main_t *im = &ip6_main;
240   vnet_main_t *vnm = vnet_get_main ();
241   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
242
243   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
244
245   /*
246    * enable/disable only on the 1<->0 transition
247    */
248   if (is_enable)
249     {
250       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
251         return;
252     }
253   else
254     {
255       /* The ref count is 0 when an address is removed from an interface that has
256        * no address - this is not a ciritical error */
257       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
258           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
259         return;
260     }
261
262   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
263                                !is_enable, 0, 0);
264
265   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
266                                sw_if_index, !is_enable, 0, 0);
267
268   if (is_enable)
269     hi->l3_if_count++;
270   else if (hi->l3_if_count)
271     hi->l3_if_count--;
272 }
273
274 /* get first interface address */
275 ip6_address_t *
276 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
277 {
278   ip_lookup_main_t *lm = &im->lookup_main;
279   ip_interface_address_t *ia = 0;
280   ip6_address_t *result = 0;
281
282   foreach_ip_interface_address (lm, ia, sw_if_index,
283                                 1 /* honor unnumbered */,
284   ({
285     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
286     result = a;
287     break;
288   }));
289   return result;
290 }
291
292 clib_error_t *
293 ip6_add_del_interface_address (vlib_main_t * vm,
294                                u32 sw_if_index,
295                                ip6_address_t * address,
296                                u32 address_length, u32 is_del)
297 {
298   vnet_main_t *vnm = vnet_get_main ();
299   ip6_main_t *im = &ip6_main;
300   ip_lookup_main_t *lm = &im->lookup_main;
301   clib_error_t *error = NULL;
302   u32 if_address_index;
303   ip6_address_fib_t ip6_af, *addr_fib = 0;
304   const ip6_address_t *ll_addr;
305
306   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
307   if (error)
308     {
309       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
310       return error;
311     }
312
313   if (ip6_address_is_link_local_unicast (address))
314     {
315       if (address_length != 128)
316         {
317           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
318           return
319             clib_error_create
320             ("prefix length of link-local address must be 128");
321         }
322       if (!is_del)
323         {
324           int rv;
325
326           rv = ip6_link_set_local_address (sw_if_index, address);
327
328           if (rv)
329             {
330               vnm->api_errno = rv;
331               return clib_error_create ("address not assignable");
332             }
333         }
334       else
335         {
336           ll_addr = ip6_get_link_local_address (sw_if_index);
337           if (ip6_address_is_equal (ll_addr, address))
338             {
339               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
340               return clib_error_create ("address not deletable");
341             }
342           else
343             {
344               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
345               return clib_error_create ("address not found");
346             }
347         }
348
349       return (NULL);
350     }
351
352   ip6_addr_fib_init (&ip6_af, address,
353                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
354   vec_add1 (addr_fib, ip6_af);
355
356   if (!is_del)
357     {
358       /* When adding an address check that it does not conflict
359          with an existing address on any interface in this table. */
360       ip_interface_address_t *ia;
361       vnet_sw_interface_t *sif;
362
363       pool_foreach (sif, vnm->interface_main.sw_interfaces)
364        {
365           if (im->fib_index_by_sw_if_index[sw_if_index] ==
366               im->fib_index_by_sw_if_index[sif->sw_if_index])
367             {
368               foreach_ip_interface_address
369                 (&im->lookup_main, ia, sif->sw_if_index,
370                  0 /* honor unnumbered */ ,
371                  ({
372                    ip6_address_t * x =
373                      ip_interface_address_get_address
374                      (&im->lookup_main, ia);
375
376                    if (ip6_destination_matches_route
377                        (im, address, x, ia->address_length) ||
378                        ip6_destination_matches_route (im,
379                                                       x,
380                                                       address,
381                                                       address_length))
382                      {
383                        /* an intf may have >1 addr from the same prefix */
384                        if ((sw_if_index == sif->sw_if_index) &&
385                            (ia->address_length == address_length) &&
386                            !ip6_address_is_equal (x, address))
387                          continue;
388
389                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
390                          /* if the address we're comparing against is stale
391                           * then the CP has not added this one back yet, maybe
392                           * it never will, so we have to assume it won't and
393                           * ignore it. if it does add it back, then it will fail
394                           * because this one is now present */
395                          continue;
396
397                        /* error if the length or intf was different */
398                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
399                        error =  clib_error_create
400                          ("failed to add %U which conflicts with %U for interface %U",
401                           format_ip6_address_and_length, address,
402                           address_length,
403                           format_ip6_address_and_length, x,
404                           ia->address_length,
405                           format_vnet_sw_if_index_name, vnm,
406                           sif->sw_if_index);
407                        goto done;
408                      }
409                  }));
410             }
411       }
412     }
413
414   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
415
416   if (is_del)
417     {
418       if (~0 == if_address_index)
419         {
420           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
421           error = clib_error_create ("%U not found for interface %U",
422                                      lm->format_address_and_length,
423                                      addr_fib, address_length,
424                                      format_vnet_sw_if_index_name, vnm,
425                                      sw_if_index);
426           goto done;
427         }
428
429       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
430                                         address_length, sw_if_index);
431       if (error)
432         goto done;
433     }
434   else
435     {
436       if (~0 != if_address_index)
437         {
438           ip_interface_address_t *ia;
439
440           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
441
442           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
443             {
444               if (ia->sw_if_index == sw_if_index)
445                 {
446                   /* re-adding an address during the replace action.
447                    * consdier this the update. clear the flag and
448                    * we're done */
449                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
450                   goto done;
451                 }
452               else
453                 {
454                   /* The prefix is moving from one interface to another.
455                    * delete the stale and add the new */
456                   ip6_add_del_interface_address (vm,
457                                                  ia->sw_if_index,
458                                                  address, address_length, 1);
459                   ia = NULL;
460                   error = ip_interface_address_add (lm, sw_if_index,
461                                                     addr_fib, address_length,
462                                                     &if_address_index);
463                 }
464             }
465           else
466             {
467               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
468               error = clib_error_create
469                 ("Prefix %U already found on interface %U",
470                  lm->format_address_and_length, addr_fib, address_length,
471                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
472             }
473         }
474       else
475         error = ip_interface_address_add (lm, sw_if_index,
476                                           addr_fib, address_length,
477                                           &if_address_index);
478     }
479
480   if (error)
481     goto done;
482
483   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
484   if (!is_del)
485     ip6_link_enable (sw_if_index, NULL);
486
487   /* intf addr routes are added/deleted on admin up/down */
488   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
489     {
490       if (is_del)
491         ip6_del_interface_routes (sw_if_index,
492                                   im, ip6_af.fib_index, address,
493                                   address_length);
494       else
495         ip6_add_interface_routes (vnm, sw_if_index,
496                                   im, ip6_af.fib_index,
497                                   pool_elt_at_index (lm->if_address_pool,
498                                                      if_address_index));
499     }
500
501   ip6_add_del_interface_address_callback_t *cb;
502   vec_foreach (cb, im->add_del_interface_address_callbacks)
503     cb->function (im, cb->function_opaque, sw_if_index,
504                   address, address_length, if_address_index, is_del);
505
506   if (is_del)
507     ip6_link_disable (sw_if_index);
508
509 done:
510   vec_free (addr_fib);
511   return error;
512 }
513
514 #endif
515
516 static clib_error_t *
517 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
518 {
519   ip6_main_t *im = &ip6_main;
520   ip_interface_address_t *ia;
521   ip6_address_t *a;
522   u32 is_admin_up, fib_index;
523
524   vec_validate_init_empty (im->
525                            lookup_main.if_address_pool_index_by_sw_if_index,
526                            sw_if_index, ~0);
527
528   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
529
530   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
531
532   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
533                                 0 /* honor unnumbered */,
534   ({
535     a = ip_interface_address_get_address (&im->lookup_main, ia);
536     if (is_admin_up)
537       ip6_add_interface_routes (vnm, sw_if_index,
538                                 im, fib_index,
539                                 ia);
540     else
541       ip6_del_interface_routes (sw_if_index, im, fib_index,
542                                 a, ia->address_length);
543   }));
544
545   return 0;
546 }
547
548 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
549
550 /* Built-in ip6 unicast rx feature path definition */
551 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
552 {
553   .arc_name  = "ip6-unicast",
554   .start_nodes = VNET_FEATURES ("ip6-input"),
555   .last_in_arc = "ip6-lookup",
556   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
557 };
558
559 VNET_FEATURE_INIT (ip6_flow_classify, static) =
560 {
561   .arc_name = "ip6-unicast",
562   .node_name = "ip6-flow-classify",
563   .runs_before = VNET_FEATURES ("ip6-inacl"),
564 };
565
566 VNET_FEATURE_INIT (ip6_inacl, static) =
567 {
568   .arc_name = "ip6-unicast",
569   .node_name = "ip6-inacl",
570   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
571 };
572
573 VNET_FEATURE_INIT (ip6_policer_classify, static) =
574 {
575   .arc_name = "ip6-unicast",
576   .node_name = "ip6-policer-classify",
577   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
578 };
579
580 VNET_FEATURE_INIT (ip6_ipsec, static) =
581 {
582   .arc_name = "ip6-unicast",
583   .node_name = "ipsec6-input-feature",
584   .runs_before = VNET_FEATURES ("l2tp-decap"),
585 };
586
587 VNET_FEATURE_INIT (ip6_l2tp, static) =
588 {
589   .arc_name = "ip6-unicast",
590   .node_name = "l2tp-decap",
591   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
592 };
593
594 VNET_FEATURE_INIT (ip6_vpath, static) =
595 {
596   .arc_name = "ip6-unicast",
597   .node_name = "vpath-input-ip6",
598   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
599 };
600
601 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
602 {
603   .arc_name = "ip6-unicast",
604   .node_name = "ip6-vxlan-bypass",
605   .runs_before = VNET_FEATURES ("ip6-lookup"),
606 };
607
608 VNET_FEATURE_INIT (ip6_not_enabled, static) =
609 {
610   .arc_name = "ip6-unicast",
611   .node_name = "ip6-not-enabled",
612   .runs_before = VNET_FEATURES ("ip6-lookup"),
613 };
614
615 VNET_FEATURE_INIT (ip6_lookup, static) =
616 {
617   .arc_name = "ip6-unicast",
618   .node_name = "ip6-lookup",
619   .runs_before = 0,  /*last feature*/
620 };
621
622 /* Built-in ip6 multicast rx feature path definition (none now) */
623 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
624 {
625   .arc_name  = "ip6-multicast",
626   .start_nodes = VNET_FEATURES ("ip6-input"),
627   .last_in_arc = "ip6-mfib-forward-lookup",
628   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
629 };
630
631 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
632   .arc_name = "ip6-multicast",
633   .node_name = "vpath-input-ip6",
634   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
635 };
636
637 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
638   .arc_name = "ip6-multicast",
639   .node_name = "ip6-not-enabled",
640   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
641 };
642
643 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
644   .arc_name = "ip6-multicast",
645   .node_name = "ip6-mfib-forward-lookup",
646   .runs_before = 0, /* last feature */
647 };
648
649 /* Built-in ip4 tx feature path definition */
650 VNET_FEATURE_ARC_INIT (ip6_output, static) =
651 {
652   .arc_name  = "ip6-output",
653   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
654   .last_in_arc = "interface-output",
655   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
656 };
657
658 VNET_FEATURE_INIT (ip6_outacl, static) = {
659   .arc_name = "ip6-output",
660   .node_name = "ip6-outacl",
661   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
662 };
663
664 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
665   .arc_name = "ip6-output",
666   .node_name = "ipsec6-output-feature",
667   .runs_before = VNET_FEATURES ("interface-output"),
668 };
669
670 VNET_FEATURE_INIT (ip6_interface_output, static) = {
671   .arc_name = "ip6-output",
672   .node_name = "interface-output",
673   .runs_before = 0, /* not before any other features */
674 };
675
676 static clib_error_t *
677 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
678 {
679   ip6_main_t *im = &ip6_main;
680
681   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
682   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
683
684   if (is_add)
685     {
686       /* Fill in lookup tables with default table (0). */
687       im->fib_index_by_sw_if_index[sw_if_index] = 0;
688       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
689     }
690   else
691     {
692       /* Ensure that IPv6 is disabled */
693       ip6_main_t *im6 = &ip6_main;
694       ip_lookup_main_t *lm6 = &im6->lookup_main;
695       ip_interface_address_t *ia = 0;
696       ip6_address_t *address;
697       vlib_main_t *vm = vlib_get_main ();
698
699       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
700       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
701       ({
702         address = ip_interface_address_get_address (lm6, ia);
703         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
704       }));
705       ip6_mfib_interface_enable_disable (sw_if_index, 0);
706
707       if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
708         fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
709       if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
710         mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
711
712       /* Erase the lookup tables just in case */
713       im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
714       im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
715     }
716
717   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
718                                is_add, 0, 0);
719
720   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
721                                sw_if_index, is_add, 0, 0);
722
723   return /* no error */ 0;
724 }
725
726 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
727
728 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
729                                 vlib_node_runtime_t * node,
730                                 vlib_frame_t * frame)
731 {
732   return ip6_lookup_inline (vm, node, frame);
733 }
734
735 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
736
737 VLIB_REGISTER_NODE (ip6_lookup_node) =
738 {
739   .name = "ip6-lookup",
740   .vector_size = sizeof (u32),
741   .format_trace = format_ip6_lookup_trace,
742   .n_next_nodes = IP6_LOOKUP_N_NEXT,
743   .next_nodes = IP6_LOOKUP_NEXT_NODES,
744 };
745
746 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
747                                       vlib_node_runtime_t * node,
748                                       vlib_frame_t * frame)
749 {
750   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
751   u32 n_left, *from;
752   u32 thread_index = vm->thread_index;
753   ip6_main_t *im = &ip6_main;
754   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
755   u16 nexts[VLIB_FRAME_SIZE], *next;
756
757   from = vlib_frame_vector_args (frame);
758   n_left = frame->n_vectors;
759   next = nexts;
760
761   vlib_get_buffers (vm, from, bufs, n_left);
762
763   while (n_left >= 4)
764     {
765       const load_balance_t *lb0, *lb1;
766       const ip6_header_t *ip0, *ip1;
767       u32 lbi0, hc0, lbi1, hc1;
768       const dpo_id_t *dpo0, *dpo1;
769
770       /* Prefetch next iteration. */
771       {
772         vlib_prefetch_buffer_header (b[2], STORE);
773         vlib_prefetch_buffer_header (b[3], STORE);
774
775         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
776         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
777       }
778
779       ip0 = vlib_buffer_get_current (b[0]);
780       ip1 = vlib_buffer_get_current (b[1]);
781       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
782       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
783
784       lb0 = load_balance_get (lbi0);
785       lb1 = load_balance_get (lbi1);
786
787       /*
788        * this node is for via FIBs we can re-use the hash value from the
789        * to node if present.
790        * We don't want to use the same hash value at each level in the recursion
791        * graph as that would lead to polarisation
792        */
793       hc0 = hc1 = 0;
794
795       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
796         {
797           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
798             {
799               hc0 = vnet_buffer (b[0])->ip.flow_hash =
800                 vnet_buffer (b[0])->ip.flow_hash >> 1;
801             }
802           else
803             {
804               hc0 = vnet_buffer (b[0])->ip.flow_hash =
805                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
806             }
807           dpo0 = load_balance_get_fwd_bucket
808             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
809         }
810       else
811         {
812           dpo0 = load_balance_get_bucket_i (lb0, 0);
813         }
814       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
815         {
816           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
817             {
818               hc1 = vnet_buffer (b[1])->ip.flow_hash =
819                 vnet_buffer (b[1])->ip.flow_hash >> 1;
820             }
821           else
822             {
823               hc1 = vnet_buffer (b[1])->ip.flow_hash =
824                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
825             }
826           dpo1 = load_balance_get_fwd_bucket
827             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
828         }
829       else
830         {
831           dpo1 = load_balance_get_bucket_i (lb1, 0);
832         }
833
834       next[0] = dpo0->dpoi_next_node;
835       next[1] = dpo1->dpoi_next_node;
836
837       /* Only process the HBH Option Header if explicitly configured to do so */
838       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
839         {
840           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
841             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
842         }
843       /* Only process the HBH Option Header if explicitly configured to do so */
844       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
845         {
846           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
847             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
848         }
849
850       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
851       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
852
853       vlib_increment_combined_counter
854         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
855       vlib_increment_combined_counter
856         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
857
858       b += 2;
859       next += 2;
860       n_left -= 2;
861     }
862
863   while (n_left > 0)
864     {
865       const load_balance_t *lb0;
866       const ip6_header_t *ip0;
867       const dpo_id_t *dpo0;
868       u32 lbi0, hc0;
869
870       ip0 = vlib_buffer_get_current (b[0]);
871       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
872
873       lb0 = load_balance_get (lbi0);
874
875       hc0 = 0;
876       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
877         {
878           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
879             {
880               hc0 = vnet_buffer (b[0])->ip.flow_hash =
881                 vnet_buffer (b[0])->ip.flow_hash >> 1;
882             }
883           else
884             {
885               hc0 = vnet_buffer (b[0])->ip.flow_hash =
886                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
887             }
888           dpo0 = load_balance_get_fwd_bucket
889             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
890         }
891       else
892         {
893           dpo0 = load_balance_get_bucket_i (lb0, 0);
894         }
895
896       next[0] = dpo0->dpoi_next_node;
897       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
898
899       /* Only process the HBH Option Header if explicitly configured to do so */
900       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
901         {
902           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
903             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
904         }
905
906       vlib_increment_combined_counter
907         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
908
909       b += 1;
910       next += 1;
911       n_left -= 1;
912     }
913
914   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
915
916   if (node->flags & VLIB_NODE_FLAG_TRACE)
917     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
918
919   return frame->n_vectors;
920 }
921
922 VLIB_REGISTER_NODE (ip6_load_balance_node) =
923 {
924   .name = "ip6-load-balance",
925   .vector_size = sizeof (u32),
926   .sibling_of = "ip6-lookup",
927   .format_trace = format_ip6_lookup_trace,
928 };
929
930 typedef struct
931 {
932   /* Adjacency taken. */
933   u32 adj_index;
934   u32 flow_hash;
935   u32 fib_index;
936
937   /* Packet data, possibly *after* rewrite. */
938   u8 packet_data[128 - 1 * sizeof (u32)];
939 }
940 ip6_forward_next_trace_t;
941
942 #ifndef CLIB_MARCH_VARIANT
943 u8 *
944 format_ip6_forward_next_trace (u8 * s, va_list * args)
945 {
946   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
947   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
948   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
949   u32 indent = format_get_indent (s);
950
951   s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
952               t->fib_index, t->adj_index, t->flow_hash);
953   s = format (s, "\n%U%U",
954               format_white_space, indent,
955               format_ip6_header, t->packet_data, sizeof (t->packet_data));
956   return s;
957 }
958 #endif
959
960 static u8 *
961 format_ip6_lookup_trace (u8 * s, va_list * args)
962 {
963   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
964   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
965   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
966   u32 indent = format_get_indent (s);
967
968   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
969               t->fib_index, t->adj_index, t->flow_hash);
970   s = format (s, "\n%U%U",
971               format_white_space, indent,
972               format_ip6_header, t->packet_data, sizeof (t->packet_data));
973   return s;
974 }
975
976
977 static u8 *
978 format_ip6_rewrite_trace (u8 * s, va_list * args)
979 {
980   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
981   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
982   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
983   u32 indent = format_get_indent (s);
984
985   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
986               t->fib_index, t->adj_index, format_ip_adjacency,
987               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
988   s = format (s, "\n%U%U",
989               format_white_space, indent,
990               format_ip_adjacency_packet_data,
991               t->packet_data, sizeof (t->packet_data));
992   return s;
993 }
994
995 /* Common trace function for all ip6-forward next nodes. */
996 #ifndef CLIB_MARCH_VARIANT
997 void
998 ip6_forward_next_trace (vlib_main_t * vm,
999                         vlib_node_runtime_t * node,
1000                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1001 {
1002   u32 *from, n_left;
1003   ip6_main_t *im = &ip6_main;
1004
1005   n_left = frame->n_vectors;
1006   from = vlib_frame_vector_args (frame);
1007
1008   while (n_left >= 4)
1009     {
1010       u32 bi0, bi1;
1011       vlib_buffer_t *b0, *b1;
1012       ip6_forward_next_trace_t *t0, *t1;
1013
1014       /* Prefetch next iteration. */
1015       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1016       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1017
1018       bi0 = from[0];
1019       bi1 = from[1];
1020
1021       b0 = vlib_get_buffer (vm, bi0);
1022       b1 = vlib_get_buffer (vm, bi1);
1023
1024       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1025         {
1026           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1027           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1028           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1029           t0->fib_index =
1030             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1031              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1032             vec_elt (im->fib_index_by_sw_if_index,
1033                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1034
1035           clib_memcpy_fast (t0->packet_data,
1036                             vlib_buffer_get_current (b0),
1037                             sizeof (t0->packet_data));
1038         }
1039       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1040         {
1041           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1042           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1043           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1044           t1->fib_index =
1045             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1046              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1047             vec_elt (im->fib_index_by_sw_if_index,
1048                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1049
1050           clib_memcpy_fast (t1->packet_data,
1051                             vlib_buffer_get_current (b1),
1052                             sizeof (t1->packet_data));
1053         }
1054       from += 2;
1055       n_left -= 2;
1056     }
1057
1058   while (n_left >= 1)
1059     {
1060       u32 bi0;
1061       vlib_buffer_t *b0;
1062       ip6_forward_next_trace_t *t0;
1063
1064       bi0 = from[0];
1065
1066       b0 = vlib_get_buffer (vm, bi0);
1067
1068       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1069         {
1070           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1071           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1072           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1073           t0->fib_index =
1074             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1075              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1076             vec_elt (im->fib_index_by_sw_if_index,
1077                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1078
1079           clib_memcpy_fast (t0->packet_data,
1080                             vlib_buffer_get_current (b0),
1081                             sizeof (t0->packet_data));
1082         }
1083       from += 1;
1084       n_left -= 1;
1085     }
1086 }
1087
1088 /* Compute TCP/UDP/ICMP6 checksum in software. */
1089 u16
1090 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1091                                    ip6_header_t * ip0, int *bogus_lengthp)
1092 {
1093   ip_csum_t sum0 = 0;
1094   u16 payload_length, payload_length_host_byte_order;
1095   u32 i;
1096   u32 headers_size = sizeof (ip0[0]);
1097   u8 *data_this_buffer;
1098   u8 next_hdr = ip0->protocol;
1099
1100   ASSERT (bogus_lengthp);
1101   *bogus_lengthp = 0;
1102
1103   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1104   data_this_buffer = (u8 *) (ip0 + 1);
1105   payload_length = ip0->payload_length;
1106
1107   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
1108    * or UDP-Ping packets */
1109   if (PREDICT_FALSE (next_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1110     {
1111       u32 skip_bytes;
1112       ip6_hop_by_hop_ext_t *ext_hdr =
1113         (ip6_hop_by_hop_ext_t *) data_this_buffer;
1114
1115       /* validate really icmp6 next */
1116       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1117               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
1118
1119       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
1120       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
1121
1122       payload_length_host_byte_order -= skip_bytes;
1123       headers_size += skip_bytes;
1124
1125       /* pseudo-header adjustments:
1126        *   exclude ext header bytes from payload length
1127        *   use payload IP proto rather than ext header IP proto
1128        */
1129       payload_length = clib_host_to_net_u16 (payload_length_host_byte_order);
1130       next_hdr = ext_hdr->next_hdr;
1131     }
1132
1133   /* Initialize checksum with ip pseudo-header. */
1134   sum0 = payload_length + clib_host_to_net_u16 (next_hdr);
1135
1136   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1137     {
1138       sum0 = ip_csum_with_carry
1139         (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1140       sum0 = ip_csum_with_carry
1141         (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1142     }
1143
1144   if (p0)
1145     return ip_calculate_l4_checksum (vm, p0, sum0,
1146                                      payload_length_host_byte_order,
1147                                      (u8 *) ip0, headers_size, NULL);
1148   else
1149     return ip_calculate_l4_checksum (vm, 0, sum0,
1150                                      payload_length_host_byte_order, NULL, 0,
1151                                      data_this_buffer);
1152 }
1153
1154 u32
1155 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1156 {
1157   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1158   udp_header_t *udp0;
1159   u16 sum16;
1160   int bogus_length;
1161
1162   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1163   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1164           || ip0->protocol == IP_PROTOCOL_ICMP6
1165           || ip0->protocol == IP_PROTOCOL_UDP
1166           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1167
1168   udp0 = (void *) (ip0 + 1);
1169   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1170     {
1171       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1172                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1173       return p0->flags;
1174     }
1175
1176   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1177
1178   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1179                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1180
1181   return p0->flags;
1182 }
1183 #endif
1184
1185 /**
1186  * @brief returns number of links on which src is reachable.
1187  */
1188 always_inline int
1189 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1190 {
1191   const load_balance_t *lb0;
1192   index_t lbi;
1193   u32 fib_index;
1194
1195   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1196                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1197   fib_index =
1198     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1199     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1200
1201   lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address);
1202   lb0 = load_balance_get (lbi);
1203
1204   return (fib_urpf_check_size (lb0->lb_urpf));
1205 }
1206
1207 always_inline u8
1208 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1209                            u32 * udp_offset0)
1210 {
1211   int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
1212   if (nh > 0)
1213     if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
1214       return nh;
1215   return 0;
1216 }
1217
1218 VNET_FEATURE_ARC_INIT (ip6_local) = {
1219   .arc_name = "ip6-local",
1220   .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
1221 };
1222
1223 static_always_inline u8
1224 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
1225 {
1226
1227   u16 payload_length_host_byte_order;
1228   u32 n_this_buffer, n_bytes_left;
1229   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1230   u32 headers_size = sizeof (ip0[0]);
1231   u8 *data_this_buffer;
1232
1233
1234   data_this_buffer = (u8 *) (ip0 + 1);
1235
1236   ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *) data_this_buffer;
1237
1238   /* validate really icmp6 next */
1239
1240   if (!(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1241       || (ext_hdr->next_hdr == IP_PROTOCOL_UDP))
1242     return 0;
1243
1244
1245   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1246   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1247
1248
1249   u32 n_ip_bytes_this_buffer =
1250     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1251   if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
1252     {
1253       n_this_buffer = p0->current_length > headers_size ?
1254         n_ip_bytes_this_buffer - headers_size : 0;
1255     }
1256
1257   n_bytes_left -= n_this_buffer;
1258   n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
1259
1260   if (n_bytes_left == 0)
1261     return 0;
1262   else
1263     return 1;
1264 }
1265
1266 always_inline uword
1267 ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1268                   vlib_frame_t *frame, int head_of_feature_arc,
1269                   int is_receive_dpo)
1270 {
1271   ip6_main_t *im = &ip6_main;
1272   ip_lookup_main_t *lm = &im->lookup_main;
1273   u32 *from, n_left_from;
1274   vlib_node_runtime_t *error_node =
1275     vlib_node_get_runtime (vm, ip6_input_node.index);
1276   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1277   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1278   u16 nexts[VLIB_FRAME_SIZE], *next;
1279
1280   from = vlib_frame_vector_args (frame);
1281   n_left_from = frame->n_vectors;
1282
1283   if (node->flags & VLIB_NODE_FLAG_TRACE)
1284     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1285
1286   vlib_get_buffers (vm, from, bufs, n_left_from);
1287   b = bufs;
1288   next = nexts;
1289
1290   while (n_left_from > 2)
1291     {
1292       /* Prefetch next iteration. */
1293       if (n_left_from >= 6)
1294         {
1295           vlib_prefetch_buffer_header (b[4], STORE);
1296           vlib_prefetch_buffer_header (b[5], STORE);
1297           vlib_prefetch_buffer_data (b[2], LOAD);
1298           vlib_prefetch_buffer_data (b[3], LOAD);
1299         }
1300
1301       vl_counter_ip6_enum_t error[2];
1302       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1303       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1304
1305       ip6_header_t *ip[2];
1306       ip[0] = vlib_buffer_get_current (b[0]);
1307       ip[1] = vlib_buffer_get_current (b[1]);
1308
1309       if (head_of_feature_arc)
1310         {
1311           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1312           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1313
1314           u8 type[2];
1315           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1316           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1317
1318           u32 flags[2];
1319           flags[0] = b[0]->flags;
1320           flags[1] = b[1]->flags;
1321
1322           vnet_buffer_oflags_t oflags[2];
1323           oflags[0] = vnet_buffer (b[0])->oflags;
1324           oflags[1] = vnet_buffer (b[1])->oflags;
1325
1326           u32 l4_offload[2];
1327           l4_offload[0] = (flags[0] & VNET_BUFFER_F_OFFLOAD) &&
1328                           (oflags[0] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1329                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1330           l4_offload[1] = (flags[1] & VNET_BUFFER_F_OFFLOAD) &&
1331                           (oflags[1] & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1332                                         VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1333
1334           u32 good_l4_csum[2];
1335           good_l4_csum[0] =
1336             (flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[0];
1337           good_l4_csum[1] =
1338             (flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload[1];
1339
1340           u32 udp_offset[2] = { };
1341           u8 is_tcp_udp[2];
1342           is_tcp_udp[0] =
1343             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1344           is_tcp_udp[1] =
1345             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1346           i16 len_diff[2] = { 0 };
1347           if (PREDICT_TRUE (is_tcp_udp[0]))
1348             {
1349               udp_header_t *udp =
1350                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1351               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1352                 && udp->checksum == 0;
1353               /* optimistically verify UDP length. */
1354               u16 ip_len, udp_len;
1355               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1356               udp_len = clib_net_to_host_u16 (udp->length);
1357               len_diff[0] = ip_len - udp_len;
1358             }
1359           if (PREDICT_TRUE (is_tcp_udp[1]))
1360             {
1361               udp_header_t *udp =
1362                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1363               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1364                 && udp->checksum == 0;
1365               /* optimistically verify UDP length. */
1366               u16 ip_len, udp_len;
1367               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1368               udp_len = clib_net_to_host_u16 (udp->length);
1369               len_diff[1] = ip_len - udp_len;
1370             }
1371
1372           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1373           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1374
1375           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1376           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1377
1378           u8 need_csum[2];
1379           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1380             && !good_l4_csum[0]
1381             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1382           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1383             && !good_l4_csum[1]
1384             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1385           if (PREDICT_FALSE (need_csum[0]))
1386             {
1387               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1388               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1389               error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1390             }
1391           else
1392             {
1393               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1394                 error[0] = IP6_ERROR_BAD_LENGTH;
1395             }
1396           if (PREDICT_FALSE (need_csum[1]))
1397             {
1398               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1399               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1400               error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1401             }
1402           else
1403             {
1404               if (ip6_tcp_udp_icmp_bad_length (vm, b[1]))
1405                 error[1] = IP6_ERROR_BAD_LENGTH;
1406             }
1407
1408
1409           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1410
1411           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1412
1413           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1414                          IP6_ERROR_UDP_CHECKSUM,
1415                          "Wrong IP6 errors constants");
1416           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1417                          IP6_ERROR_ICMP_CHECKSUM,
1418                          "Wrong IP6 errors constants");
1419
1420           error[0] =
1421             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1422           error[1] =
1423             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1424
1425           /* Drop packets from unroutable hosts. */
1426           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1427           u8 unroutable[2];
1428           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1429             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1430             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1431           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1432             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1433             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1434           if (PREDICT_FALSE (unroutable[0]))
1435             {
1436               error[0] =
1437                 !ip6_urpf_loose_check (im, b[0],
1438                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1439                 : error[0];
1440             }
1441           if (PREDICT_FALSE (unroutable[1]))
1442             {
1443               error[1] =
1444                 !ip6_urpf_loose_check (im, b[1],
1445                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1446                 : error[1];
1447             }
1448
1449           vnet_buffer (b[0])->ip.fib_index =
1450             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1451             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1452             vnet_buffer (b[0])->ip.fib_index;
1453           vnet_buffer (b[1])->ip.fib_index =
1454             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1455             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1456             vnet_buffer (b[1])->ip.fib_index;
1457
1458           vnet_buffer (b[0])->ip.rx_sw_if_index =
1459             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1460           vnet_buffer (b[1])->ip.rx_sw_if_index =
1461             vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1462           if (is_receive_dpo)
1463             {
1464               const receive_dpo_t *rd0, *rd1;
1465               rd0 =
1466                 receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1467               rd1 =
1468                 receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1469               if (rd0->rd_sw_if_index != ~0)
1470                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1471               if (rd1->rd_sw_if_index != ~0)
1472                 vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1473             }
1474         }                       /* head_of_feature_arc */
1475
1476       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1477       next[0] =
1478         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1479       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1480       next[1] =
1481         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1482
1483       b[0]->error = error_node->errors[error[0]];
1484       b[1]->error = error_node->errors[error[1]];
1485
1486       if (head_of_feature_arc)
1487         {
1488           u8 ip6_unknown[2];
1489           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1490           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1491           if (PREDICT_TRUE (ip6_unknown[0]))
1492             {
1493               u32 next32 = next[0];
1494               vnet_feature_arc_start (arc_index,
1495                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1496                                       &next32, b[0]);
1497               next[0] = next32;
1498             }
1499           if (PREDICT_TRUE (ip6_unknown[1]))
1500             {
1501               u32 next32 = next[1];
1502               vnet_feature_arc_start (arc_index,
1503                                       vnet_buffer (b[1])->ip.rx_sw_if_index,
1504                                       &next32, b[1]);
1505               next[1] = next32;
1506             }
1507         }
1508
1509       /* next */
1510       b += 2;
1511       next += 2;
1512       n_left_from -= 2;
1513     }
1514
1515   while (n_left_from)
1516     {
1517       u8 error;
1518       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1519
1520       ip6_header_t *ip;
1521       ip = vlib_buffer_get_current (b[0]);
1522
1523       if (head_of_feature_arc)
1524         {
1525           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1526           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1527
1528           u32 flags = b[0]->flags;
1529
1530           vnet_buffer_oflags_t oflags = vnet_buffer (b[0])->oflags;
1531
1532           u32 l4_offload = (flags & VNET_BUFFER_F_OFFLOAD) &&
1533                            (oflags & (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
1534                                       VNET_BUFFER_OFFLOAD_F_UDP_CKSUM));
1535
1536           u32 good_l4_csum =
1537             (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) | l4_offload;
1538           u32 udp_offset;
1539           i16 len_diff = 0;
1540           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1541           if (PREDICT_TRUE (is_tcp_udp))
1542             {
1543               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1544               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1545               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1546                 && udp->checksum == 0;
1547               /* optimistically verify UDP length. */
1548               u16 ip_len, udp_len;
1549               ip_len = clib_net_to_host_u16 (ip->payload_length);
1550               udp_len = clib_net_to_host_u16 (udp->length);
1551               len_diff = ip_len - udp_len;
1552             }
1553
1554           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1555           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1556
1557           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN &&
1558                          !good_l4_csum &&
1559                          !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1560           if (PREDICT_FALSE (need_csum))
1561             {
1562               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1563               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1564               error = IP6_ERROR_UNKNOWN_PROTOCOL;
1565             }
1566           else
1567             {
1568               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1569                 error = IP6_ERROR_BAD_LENGTH;
1570             }
1571
1572
1573
1574           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1575           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1576                          IP6_ERROR_UDP_CHECKSUM,
1577                          "Wrong IP6 errors constants");
1578           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1579                          IP6_ERROR_ICMP_CHECKSUM,
1580                          "Wrong IP6 errors constants");
1581
1582           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1583
1584           /* Drop packets from unroutable hosts. */
1585           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1586           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1587             && type != IP_BUILTIN_PROTOCOL_ICMP
1588             && !ip6_address_is_link_local_unicast (&ip->src_address);
1589           if (PREDICT_FALSE (unroutable))
1590             {
1591               error =
1592                 !ip6_urpf_loose_check (im, b[0],
1593                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1594                 error;
1595             }
1596
1597           vnet_buffer (b[0])->ip.fib_index =
1598             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1599             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1600             vnet_buffer (b[0])->ip.fib_index;
1601
1602           vnet_buffer (b[0])->ip.rx_sw_if_index =
1603             vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1604           if (is_receive_dpo)
1605             {
1606               receive_dpo_t *rd;
1607               rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1608               if (rd->rd_sw_if_index != ~0)
1609                 vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
1610             }
1611         }                       /* head_of_feature_arc */
1612
1613       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1614       next[0] =
1615         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1616
1617       b[0]->error = error_node->errors[error];
1618
1619       if (head_of_feature_arc)
1620         {
1621           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1622             {
1623               u32 next32 = next[0];
1624               vnet_feature_arc_start (arc_index,
1625                                       vnet_buffer (b[0])->ip.rx_sw_if_index,
1626                                       &next32, b[0]);
1627               next[0] = next32;
1628             }
1629         }
1630
1631       /* next */
1632       b += 1;
1633       next += 1;
1634       n_left_from -= 1;
1635     }
1636
1637   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1638   return frame->n_vectors;
1639 }
1640
1641 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1642                                vlib_frame_t * frame)
1643 {
1644   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1645                            0 /* ip6_local_inline */);
1646 }
1647
1648 VLIB_REGISTER_NODE (ip6_local_node) =
1649 {
1650   .name = "ip6-local",
1651   .vector_size = sizeof (u32),
1652   .format_trace = format_ip6_forward_next_trace,
1653   .n_errors = IP6_N_ERROR,
1654   .error_counters = ip6_error_counters,
1655   .n_next_nodes = IP_LOCAL_N_NEXT,
1656   .next_nodes =
1657   {
1658     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1659     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1660     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1661     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1662     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
1663   },
1664 };
1665
1666 VLIB_NODE_FN (ip6_receive_local_node)
1667 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1668 {
1669   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
1670                            1 /* is_receive_dpo */);
1671 }
1672
1673 VLIB_REGISTER_NODE (ip6_receive_local_node) = {
1674   .name = "ip6-receive",
1675   .vector_size = sizeof (u32),
1676   .format_trace = format_ip6_forward_next_trace,
1677   .sibling_of = "ip6-local"
1678 };
1679
1680 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1681                                           vlib_node_runtime_t * node,
1682                                           vlib_frame_t * frame)
1683 {
1684   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */,
1685                            0 /* ip6_local_inline */);
1686 }
1687
1688 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1689   .name = "ip6-local-end-of-arc",
1690   .vector_size = sizeof (u32),
1691
1692   .format_trace = format_ip6_forward_next_trace,
1693   .sibling_of = "ip6-local",
1694 };
1695
1696 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1697   .arc_name = "ip6-local",
1698   .node_name = "ip6-local-end-of-arc",
1699   .runs_before = 0, /* not before any other features */
1700 };
1701
1702 #ifdef CLIB_MARCH_VARIANT
1703 extern vlib_node_registration_t ip6_local_node;
1704 #else
1705 void
1706 ip6_register_protocol (u32 protocol, u32 node_index)
1707 {
1708   vlib_main_t *vm = vlib_get_main ();
1709   ip6_main_t *im = &ip6_main;
1710   ip_lookup_main_t *lm = &im->lookup_main;
1711
1712   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1713   lm->local_next_by_ip_protocol[protocol] =
1714     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1715 }
1716
1717 void
1718 ip6_unregister_protocol (u32 protocol)
1719 {
1720   ip6_main_t *im = &ip6_main;
1721   ip_lookup_main_t *lm = &im->lookup_main;
1722
1723   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1724   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1725 }
1726 #endif
1727
1728 typedef enum
1729 {
1730   IP6_REWRITE_NEXT_DROP,
1731   IP6_REWRITE_NEXT_ICMP_ERROR,
1732   IP6_REWRITE_NEXT_FRAGMENT,
1733   IP6_REWRITE_N_NEXT            /* Last */
1734 } ip6_rewrite_next_t;
1735
1736 /**
1737  * This bits of an IPv6 address to mask to construct a multicast
1738  * MAC address
1739  */
1740 #define IP6_MCAST_ADDR_MASK 0xffffffff
1741
1742 always_inline void
1743 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1744                u16 adj_packet_bytes, bool is_locally_generated,
1745                u32 * next, u8 is_midchain, u32 * error)
1746 {
1747   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1748     {
1749       if (is_locally_generated)
1750         {
1751           /* IP fragmentation */
1752           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1753                                    (is_midchain ?
1754                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1755                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1756           *next = IP6_REWRITE_NEXT_FRAGMENT;
1757           *error = IP6_ERROR_MTU_EXCEEDED;
1758         }
1759       else
1760         {
1761           *error = IP6_ERROR_MTU_EXCEEDED;
1762           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1763                                        adj_packet_bytes);
1764           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1765         }
1766     }
1767 }
1768
1769 always_inline uword
1770 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1771                              vlib_node_runtime_t * node,
1772                              vlib_frame_t * frame,
1773                              int do_counters, int is_midchain, int is_mcast)
1774 {
1775   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1776   u32 *from = vlib_frame_vector_args (frame);
1777   u32 n_left_from, n_left_to_next, *to_next, next_index;
1778   vlib_node_runtime_t *error_node =
1779     vlib_node_get_runtime (vm, ip6_input_node.index);
1780
1781   n_left_from = frame->n_vectors;
1782   next_index = node->cached_next_index;
1783   u32 thread_index = vm->thread_index;
1784
1785   while (n_left_from > 0)
1786     {
1787       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1788
1789       while (n_left_from >= 4 && n_left_to_next >= 2)
1790         {
1791           const ip_adjacency_t *adj0, *adj1;
1792           vlib_buffer_t *p0, *p1;
1793           ip6_header_t *ip0, *ip1;
1794           u32 pi0, rw_len0, next0, error0, adj_index0;
1795           u32 pi1, rw_len1, next1, error1, adj_index1;
1796           u32 tx_sw_if_index0, tx_sw_if_index1;
1797           bool is_locally_originated0, is_locally_originated1;
1798
1799           /* Prefetch next iteration. */
1800           {
1801             vlib_buffer_t *p2, *p3;
1802
1803             p2 = vlib_get_buffer (vm, from[2]);
1804             p3 = vlib_get_buffer (vm, from[3]);
1805
1806             vlib_prefetch_buffer_header (p2, LOAD);
1807             vlib_prefetch_buffer_header (p3, LOAD);
1808
1809             clib_prefetch_store (p2->pre_data);
1810             clib_prefetch_store (p3->pre_data);
1811
1812             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1813             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1814           }
1815
1816           pi0 = to_next[0] = from[0];
1817           pi1 = to_next[1] = from[1];
1818
1819           from += 2;
1820           n_left_from -= 2;
1821           to_next += 2;
1822           n_left_to_next -= 2;
1823
1824           p0 = vlib_get_buffer (vm, pi0);
1825           p1 = vlib_get_buffer (vm, pi1);
1826
1827           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1828           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1829
1830           ip0 = vlib_buffer_get_current (p0);
1831           ip1 = vlib_buffer_get_current (p1);
1832
1833           error0 = error1 = IP6_ERROR_NONE;
1834           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1835
1836           is_locally_originated0 =
1837             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1838           if (PREDICT_TRUE (!is_locally_originated0))
1839             {
1840               i32 hop_limit0 = ip0->hop_limit;
1841
1842               /* Input node should have reject packets with hop limit 0. */
1843               ASSERT (ip0->hop_limit > 0);
1844
1845               hop_limit0 -= 1;
1846
1847               ip0->hop_limit = hop_limit0;
1848
1849               /*
1850                * If the hop count drops below 1 when forwarding, generate
1851                * an ICMP response.
1852                */
1853               if (PREDICT_FALSE (hop_limit0 <= 0))
1854                 {
1855                   error0 = IP6_ERROR_TIME_EXPIRED;
1856                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1857                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1858                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1859                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1860                                                0);
1861                 }
1862             }
1863
1864           is_locally_originated1 =
1865             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1866           if (PREDICT_TRUE (!is_locally_originated1))
1867             {
1868               i32 hop_limit1 = ip1->hop_limit;
1869
1870               /* Input node should have reject packets with hop limit 0. */
1871               ASSERT (ip1->hop_limit > 0);
1872
1873               hop_limit1 -= 1;
1874
1875               ip1->hop_limit = hop_limit1;
1876
1877               /*
1878                * If the hop count drops below 1 when forwarding, generate
1879                * an ICMP response.
1880                */
1881               if (PREDICT_FALSE (hop_limit1 <= 0))
1882                 {
1883                   error1 = IP6_ERROR_TIME_EXPIRED;
1884                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1885                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1886                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1887                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1888                                                0);
1889                 }
1890             }
1891
1892           adj0 = adj_get (adj_index0);
1893           adj1 = adj_get (adj_index1);
1894
1895           rw_len0 = adj0[0].rewrite_header.data_bytes;
1896           rw_len1 = adj1[0].rewrite_header.data_bytes;
1897           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1898           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1899
1900           if (do_counters)
1901             {
1902               vlib_increment_combined_counter
1903                 (&adjacency_counters,
1904                  thread_index, adj_index0, 1,
1905                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1906               vlib_increment_combined_counter
1907                 (&adjacency_counters,
1908                  thread_index, adj_index1, 1,
1909                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1910             }
1911
1912           /* Check MTU of outgoing interface. */
1913           u16 ip0_len =
1914             clib_net_to_host_u16 (ip0->payload_length) +
1915             sizeof (ip6_header_t);
1916           u16 ip1_len =
1917             clib_net_to_host_u16 (ip1->payload_length) +
1918             sizeof (ip6_header_t);
1919           if (p0->flags & VNET_BUFFER_F_GSO)
1920             ip0_len = gso_mtu_sz (p0);
1921           if (p1->flags & VNET_BUFFER_F_GSO)
1922             ip1_len = gso_mtu_sz (p1);
1923
1924           ip6_mtu_check (p0, ip0_len,
1925                          adj0[0].rewrite_header.max_l3_packet_bytes,
1926                          is_locally_originated0, &next0, is_midchain,
1927                          &error0);
1928           ip6_mtu_check (p1, ip1_len,
1929                          adj1[0].rewrite_header.max_l3_packet_bytes,
1930                          is_locally_originated1, &next1, is_midchain,
1931                          &error1);
1932           /* Don't adjust the buffer for hop count issue; icmp-error node
1933            * wants to see the IP header */
1934           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1935             {
1936               p0->current_data -= rw_len0;
1937               p0->current_length += rw_len0;
1938               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1939               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1940               next0 = adj0[0].rewrite_header.next_index;
1941               if (PREDICT_FALSE
1942                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1943                 vnet_feature_arc_start_w_cfg_index
1944                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
1945                    adj0->ia_cfg_index);
1946             }
1947           else
1948             {
1949               p0->error = error_node->errors[error0];
1950             }
1951           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1952             {
1953               p1->current_data -= rw_len1;
1954               p1->current_length += rw_len1;
1955
1956               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1957               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1958               next1 = adj1[0].rewrite_header.next_index;
1959
1960               if (PREDICT_FALSE
1961                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1962                 vnet_feature_arc_start_w_cfg_index
1963                   (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
1964                    adj1->ia_cfg_index);
1965             }
1966           else
1967             {
1968               p1->error = error_node->errors[error1];
1969             }
1970
1971           if (is_midchain)
1972             {
1973               /* Guess we are only writing on ipv6 header. */
1974               vnet_rewrite_two_headers (adj0[0], adj1[0],
1975                                         ip0, ip1, sizeof (ip6_header_t));
1976             }
1977           else
1978             /* Guess we are only writing on simple Ethernet header. */
1979             vnet_rewrite_two_headers (adj0[0], adj1[0],
1980                                       ip0, ip1, sizeof (ethernet_header_t));
1981
1982           if (is_midchain)
1983             {
1984               if (adj0->sub_type.midchain.fixup_func)
1985                 adj0->sub_type.midchain.fixup_func
1986                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1987               if (adj1->sub_type.midchain.fixup_func)
1988                 adj1->sub_type.midchain.fixup_func
1989                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1990             }
1991           if (is_mcast)
1992             {
1993               /*
1994                * copy bytes from the IP address into the MAC rewrite
1995                */
1996               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1997                                           adj0->
1998                                           rewrite_header.dst_mcast_offset,
1999                                           &ip0->dst_address.as_u32[3],
2000                                           (u8 *) ip0);
2001               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2002                                           adj1->
2003                                           rewrite_header.dst_mcast_offset,
2004                                           &ip1->dst_address.as_u32[3],
2005                                           (u8 *) ip1);
2006             }
2007
2008           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2009                                            to_next, n_left_to_next,
2010                                            pi0, pi1, next0, next1);
2011         }
2012
2013       while (n_left_from > 0 && n_left_to_next > 0)
2014         {
2015           ip_adjacency_t *adj0;
2016           vlib_buffer_t *p0;
2017           ip6_header_t *ip0;
2018           u32 pi0, rw_len0;
2019           u32 adj_index0, next0, error0;
2020           u32 tx_sw_if_index0;
2021           bool is_locally_originated0;
2022
2023           pi0 = to_next[0] = from[0];
2024
2025           p0 = vlib_get_buffer (vm, pi0);
2026
2027           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2028
2029           adj0 = adj_get (adj_index0);
2030
2031           ip0 = vlib_buffer_get_current (p0);
2032
2033           error0 = IP6_ERROR_NONE;
2034           next0 = IP6_REWRITE_NEXT_DROP;
2035
2036           /* Check hop limit */
2037           is_locally_originated0 =
2038             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
2039           if (PREDICT_TRUE (!is_locally_originated0))
2040             {
2041               i32 hop_limit0 = ip0->hop_limit;
2042
2043               ASSERT (ip0->hop_limit > 0);
2044
2045               hop_limit0 -= 1;
2046
2047               ip0->hop_limit = hop_limit0;
2048
2049               if (PREDICT_FALSE (hop_limit0 <= 0))
2050                 {
2051                   /*
2052                    * If the hop count drops below 1 when forwarding, generate
2053                    * an ICMP response.
2054                    */
2055                   error0 = IP6_ERROR_TIME_EXPIRED;
2056                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2057                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2058                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
2059                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
2060                                                0);
2061                 }
2062             }
2063
2064           if (is_midchain)
2065             {
2066               /* Guess we are only writing on ip6 header. */
2067               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
2068             }
2069           else
2070             /* Guess we are only writing on simple Ethernet header. */
2071             vnet_rewrite_one_header (adj0[0], ip0,
2072                                      sizeof (ethernet_header_t));
2073
2074           /* Update packet buffer attributes/set output interface. */
2075           rw_len0 = adj0[0].rewrite_header.data_bytes;
2076           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2077
2078           if (do_counters)
2079             {
2080               vlib_increment_combined_counter
2081                 (&adjacency_counters,
2082                  thread_index, adj_index0, 1,
2083                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2084             }
2085
2086           /* Check MTU of outgoing interface. */
2087           u16 ip0_len =
2088             clib_net_to_host_u16 (ip0->payload_length) +
2089             sizeof (ip6_header_t);
2090           if (p0->flags & VNET_BUFFER_F_GSO)
2091             ip0_len = gso_mtu_sz (p0);
2092
2093           ip6_mtu_check (p0, ip0_len,
2094                          adj0[0].rewrite_header.max_l3_packet_bytes,
2095                          is_locally_originated0, &next0, is_midchain,
2096                          &error0);
2097           /* Don't adjust the buffer for hop count issue; icmp-error node
2098            * wants to see the IP header */
2099           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
2100             {
2101               p0->current_data -= rw_len0;
2102               p0->current_length += rw_len0;
2103
2104               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2105
2106               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2107               next0 = adj0[0].rewrite_header.next_index;
2108
2109               if (PREDICT_FALSE
2110                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2111                 vnet_feature_arc_start_w_cfg_index
2112                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
2113                    adj0->ia_cfg_index);
2114             }
2115           else
2116             {
2117               p0->error = error_node->errors[error0];
2118             }
2119
2120           if (is_midchain)
2121             {
2122               if (adj0->sub_type.midchain.fixup_func)
2123                 adj0->sub_type.midchain.fixup_func
2124                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2125             }
2126           if (is_mcast)
2127             {
2128               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2129                                           adj0->
2130                                           rewrite_header.dst_mcast_offset,
2131                                           &ip0->dst_address.as_u32[3],
2132                                           (u8 *) ip0);
2133             }
2134
2135           from += 1;
2136           n_left_from -= 1;
2137           to_next += 1;
2138           n_left_to_next -= 1;
2139
2140           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2141                                            to_next, n_left_to_next,
2142                                            pi0, next0);
2143         }
2144
2145       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2146     }
2147
2148   /* Need to do trace after rewrites to pick up new packet data. */
2149   if (node->flags & VLIB_NODE_FLAG_TRACE)
2150     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
2151
2152   return frame->n_vectors;
2153 }
2154
2155 always_inline uword
2156 ip6_rewrite_inline (vlib_main_t * vm,
2157                     vlib_node_runtime_t * node,
2158                     vlib_frame_t * frame,
2159                     int do_counters, int is_midchain, int is_mcast)
2160 {
2161   return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2162                                       is_midchain, is_mcast);
2163 }
2164
2165 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2166                                  vlib_node_runtime_t * node,
2167                                  vlib_frame_t * frame)
2168 {
2169   if (adj_are_counters_enabled ())
2170     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2171   else
2172     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2173 }
2174
2175 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2176                                        vlib_node_runtime_t * node,
2177                                        vlib_frame_t * frame)
2178 {
2179   if (adj_are_counters_enabled ())
2180     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2181   else
2182     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2183 }
2184
2185 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2186                                        vlib_node_runtime_t * node,
2187                                        vlib_frame_t * frame)
2188 {
2189   if (adj_are_counters_enabled ())
2190     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2191   else
2192     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2193 }
2194
2195 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2196                                   vlib_node_runtime_t * node,
2197                                   vlib_frame_t * frame)
2198 {
2199   if (adj_are_counters_enabled ())
2200     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2201   else
2202     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2203 }
2204
2205 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2206                                         vlib_node_runtime_t * node,
2207                                         vlib_frame_t * frame)
2208 {
2209   if (adj_are_counters_enabled ())
2210     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2211   else
2212     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2213 }
2214
2215 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2216   .name = "ip6-midchain",
2217   .vector_size = sizeof (u32),
2218   .format_trace = format_ip6_forward_next_trace,
2219   .sibling_of = "ip6-rewrite",
2220 };
2221
2222 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2223 {
2224   .name = "ip6-rewrite",
2225   .vector_size = sizeof (u32),
2226   .format_trace = format_ip6_rewrite_trace,
2227   .n_next_nodes = IP6_REWRITE_N_NEXT,
2228   .next_nodes =
2229   {
2230     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2231     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2232     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2233   },
2234 };
2235
2236 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2237   .name = "ip6-rewrite-bcast",
2238   .vector_size = sizeof (u32),
2239
2240   .format_trace = format_ip6_rewrite_trace,
2241   .sibling_of = "ip6-rewrite",
2242 };
2243
2244 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2245 {
2246   .name = "ip6-rewrite-mcast",
2247   .vector_size = sizeof (u32),
2248   .format_trace = format_ip6_rewrite_trace,
2249   .sibling_of = "ip6-rewrite",
2250 };
2251
2252
2253 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2254 {
2255   .name = "ip6-mcast-midchain",
2256   .vector_size = sizeof (u32),
2257   .format_trace = format_ip6_rewrite_trace,
2258   .sibling_of = "ip6-rewrite",
2259 };
2260
2261
2262 /*
2263  * Hop-by-Hop handling
2264  */
2265 #ifndef CLIB_MARCH_VARIANT
2266 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2267 #endif /* CLIB_MARCH_VARIANT */
2268
2269 #define foreach_ip6_hop_by_hop_error \
2270 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2271 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2272 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2273
2274 typedef enum
2275 {
2276 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2277   foreach_ip6_hop_by_hop_error
2278 #undef _
2279   IP6_HOP_BY_HOP_N_ERROR,
2280 } ip6_hop_by_hop_error_t;
2281
2282 /*
2283  * Primary h-b-h handler trace support
2284  * We work pretty hard on the problem for obvious reasons
2285  */
2286 typedef struct
2287 {
2288   u32 next_index;
2289   u32 trace_len;
2290   u8 option_data[256];
2291 } ip6_hop_by_hop_trace_t;
2292
2293 extern vlib_node_registration_t ip6_hop_by_hop_node;
2294
2295 static char *ip6_hop_by_hop_error_strings[] = {
2296 #define _(sym,string) string,
2297   foreach_ip6_hop_by_hop_error
2298 #undef _
2299 };
2300
2301 #ifndef CLIB_MARCH_VARIANT
2302 u8 *
2303 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2304 {
2305   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2306   int total_len = va_arg (*args, int);
2307   ip6_hop_by_hop_option_t *opt0, *limit0;
2308   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2309   u8 type0;
2310   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2311               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2312
2313   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2314   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2315
2316   while (opt0 < limit0)
2317     {
2318       type0 = opt0->type;
2319       switch (type0)
2320         {
2321         case 0:         /* Pad, just stop */
2322           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2323           break;
2324
2325         default:
2326           if (hm->trace[type0])
2327             {
2328               s = (*hm->trace[type0]) (s, opt0);
2329             }
2330           else
2331             {
2332               s = format (s, "\n    unrecognized option %d length %d", type0,
2333                           opt0->length);
2334             }
2335           opt0 =
2336             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2337                                          sizeof (ip6_hop_by_hop_option_t));
2338           break;
2339         }
2340     }
2341   return s;
2342 }
2343 #endif
2344
2345 static u8 *
2346 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2347 {
2348   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2349   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2350   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2351   ip6_hop_by_hop_header_t *hbh0;
2352   ip6_hop_by_hop_option_t *opt0, *limit0;
2353   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2354
2355   u8 type0;
2356
2357   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2358
2359   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2360               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2361
2362   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2363   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2364
2365   while (opt0 < limit0)
2366     {
2367       type0 = opt0->type;
2368       switch (type0)
2369         {
2370         case 0:         /* Pad, just stop */
2371           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2372           break;
2373
2374         default:
2375           if (hm->trace[type0])
2376             {
2377               s = (*hm->trace[type0]) (s, opt0);
2378             }
2379           else
2380             {
2381               s = format (s, "\n    unrecognized option %d length %d", type0,
2382                           opt0->length);
2383             }
2384           opt0 =
2385             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2386                                          sizeof (ip6_hop_by_hop_option_t));
2387           break;
2388         }
2389     }
2390   return s;
2391 }
2392
2393 always_inline u8
2394 ip6_scan_hbh_options (vlib_buffer_t * b0,
2395                       ip6_header_t * ip0,
2396                       ip6_hop_by_hop_header_t * hbh0,
2397                       ip6_hop_by_hop_option_t * opt0,
2398                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2399 {
2400   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2401   u8 type0;
2402   u8 error0 = 0;
2403
2404   while (opt0 < limit0)
2405     {
2406       type0 = opt0->type;
2407       switch (type0)
2408         {
2409         case 0:         /* Pad1 */
2410           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2411           continue;
2412         case 1:         /* PadN */
2413           break;
2414         default:
2415           if (hm->options[type0])
2416             {
2417               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2418                 {
2419                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2420                   return (error0);
2421                 }
2422             }
2423           else
2424             {
2425               /* Unrecognized mandatory option, check the two high order bits */
2426               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2427                 {
2428                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2429                   break;
2430                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2431                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2432                   *next0 = IP_LOOKUP_NEXT_DROP;
2433                   break;
2434                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2435                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2436                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2437                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2438                                                ICMP6_parameter_problem_unrecognized_option,
2439                                                (u8 *) opt0 - (u8 *) ip0);
2440                   break;
2441                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2442                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2443                   if (!ip6_address_is_multicast (&ip0->dst_address))
2444                     {
2445                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2446                       icmp6_error_set_vnet_buffer (b0,
2447                                                    ICMP6_parameter_problem,
2448                                                    ICMP6_parameter_problem_unrecognized_option,
2449                                                    (u8 *) opt0 - (u8 *) ip0);
2450                     }
2451                   else
2452                     {
2453                       *next0 = IP_LOOKUP_NEXT_DROP;
2454                     }
2455                   break;
2456                 }
2457               return (error0);
2458             }
2459         }
2460       opt0 =
2461         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2462                                      sizeof (ip6_hop_by_hop_option_t));
2463     }
2464   return (error0);
2465 }
2466
2467 /*
2468  * Process the Hop-by-Hop Options header
2469  */
2470 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2471                                     vlib_node_runtime_t * node,
2472                                     vlib_frame_t * frame)
2473 {
2474   vlib_node_runtime_t *error_node =
2475     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2476   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2477   u32 n_left_from, *from, *to_next;
2478   ip_lookup_next_t next_index;
2479
2480   from = vlib_frame_vector_args (frame);
2481   n_left_from = frame->n_vectors;
2482   next_index = node->cached_next_index;
2483
2484   while (n_left_from > 0)
2485     {
2486       u32 n_left_to_next;
2487
2488       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2489
2490       while (n_left_from >= 4 && n_left_to_next >= 2)
2491         {
2492           u32 bi0, bi1;
2493           vlib_buffer_t *b0, *b1;
2494           u32 next0, next1;
2495           ip6_header_t *ip0, *ip1;
2496           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2497           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2498           u8 error0 = 0, error1 = 0;
2499
2500           /* Prefetch next iteration. */
2501           {
2502             vlib_buffer_t *p2, *p3;
2503
2504             p2 = vlib_get_buffer (vm, from[2]);
2505             p3 = vlib_get_buffer (vm, from[3]);
2506
2507             vlib_prefetch_buffer_header (p2, LOAD);
2508             vlib_prefetch_buffer_header (p3, LOAD);
2509
2510             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2511             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2512           }
2513
2514           /* Speculatively enqueue b0, b1 to the current next frame */
2515           to_next[0] = bi0 = from[0];
2516           to_next[1] = bi1 = from[1];
2517           from += 2;
2518           to_next += 2;
2519           n_left_from -= 2;
2520           n_left_to_next -= 2;
2521
2522           b0 = vlib_get_buffer (vm, bi0);
2523           b1 = vlib_get_buffer (vm, bi1);
2524
2525           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2526           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2527           ip_adjacency_t *adj0 = adj_get (adj_index0);
2528           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2529           ip_adjacency_t *adj1 = adj_get (adj_index1);
2530
2531           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2532           next0 = adj0->lookup_next_index;
2533           next1 = adj1->lookup_next_index;
2534
2535           ip0 = vlib_buffer_get_current (b0);
2536           ip1 = vlib_buffer_get_current (b1);
2537           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2538           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2539           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2540           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2541           limit0 =
2542             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2543                                          ((hbh0->length + 1) << 3));
2544           limit1 =
2545             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2546                                          ((hbh1->length + 1) << 3));
2547
2548           /*
2549            * Basic validity checks
2550            */
2551           if ((hbh0->length + 1) << 3 >
2552               clib_net_to_host_u16 (ip0->payload_length))
2553             {
2554               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2555               next0 = IP_LOOKUP_NEXT_DROP;
2556               goto outdual;
2557             }
2558           /* Scan the set of h-b-h options, process ones that we understand */
2559           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2560
2561           if ((hbh1->length + 1) << 3 >
2562               clib_net_to_host_u16 (ip1->payload_length))
2563             {
2564               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2565               next1 = IP_LOOKUP_NEXT_DROP;
2566               goto outdual;
2567             }
2568           /* Scan the set of h-b-h options, process ones that we understand */
2569           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2570
2571         outdual:
2572           /* Has the classifier flagged this buffer for special treatment? */
2573           if (PREDICT_FALSE
2574               ((error0 == 0)
2575                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2576             next0 = hm->next_override;
2577
2578           /* Has the classifier flagged this buffer for special treatment? */
2579           if (PREDICT_FALSE
2580               ((error1 == 0)
2581                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2582             next1 = hm->next_override;
2583
2584           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2585             {
2586               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2587                 {
2588                   ip6_hop_by_hop_trace_t *t =
2589                     vlib_add_trace (vm, node, b0, sizeof (*t));
2590                   u32 trace_len = (hbh0->length + 1) << 3;
2591                   t->next_index = next0;
2592                   /* Capture the h-b-h option verbatim */
2593                   trace_len =
2594                     trace_len <
2595                     ARRAY_LEN (t->option_data) ? trace_len :
2596                     ARRAY_LEN (t->option_data);
2597                   t->trace_len = trace_len;
2598                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2599                 }
2600               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2601                 {
2602                   ip6_hop_by_hop_trace_t *t =
2603                     vlib_add_trace (vm, node, b1, sizeof (*t));
2604                   u32 trace_len = (hbh1->length + 1) << 3;
2605                   t->next_index = next1;
2606                   /* Capture the h-b-h option verbatim */
2607                   trace_len =
2608                     trace_len <
2609                     ARRAY_LEN (t->option_data) ? trace_len :
2610                     ARRAY_LEN (t->option_data);
2611                   t->trace_len = trace_len;
2612                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2613                 }
2614
2615             }
2616
2617           b0->error = error_node->errors[error0];
2618           b1->error = error_node->errors[error1];
2619
2620           /* verify speculative enqueue, maybe switch current next frame */
2621           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2622                                            n_left_to_next, bi0, bi1, next0,
2623                                            next1);
2624         }
2625
2626       while (n_left_from > 0 && n_left_to_next > 0)
2627         {
2628           u32 bi0;
2629           vlib_buffer_t *b0;
2630           u32 next0;
2631           ip6_header_t *ip0;
2632           ip6_hop_by_hop_header_t *hbh0;
2633           ip6_hop_by_hop_option_t *opt0, *limit0;
2634           u8 error0 = 0;
2635
2636           /* Speculatively enqueue b0 to the current next frame */
2637           bi0 = from[0];
2638           to_next[0] = bi0;
2639           from += 1;
2640           to_next += 1;
2641           n_left_from -= 1;
2642           n_left_to_next -= 1;
2643
2644           b0 = vlib_get_buffer (vm, bi0);
2645           /*
2646            * Default use the next_index from the adjacency.
2647            * A HBH option rarely redirects to a different node
2648            */
2649           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2650           ip_adjacency_t *adj0 = adj_get (adj_index0);
2651           next0 = adj0->lookup_next_index;
2652
2653           ip0 = vlib_buffer_get_current (b0);
2654           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2655           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2656           limit0 =
2657             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2658                                          ((hbh0->length + 1) << 3));
2659
2660           /*
2661            * Basic validity checks
2662            */
2663           if ((hbh0->length + 1) << 3 >
2664               clib_net_to_host_u16 (ip0->payload_length))
2665             {
2666               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2667               next0 = IP_LOOKUP_NEXT_DROP;
2668               goto out0;
2669             }
2670
2671           /* Scan the set of h-b-h options, process ones that we understand */
2672           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2673
2674         out0:
2675           /* Has the classifier flagged this buffer for special treatment? */
2676           if (PREDICT_FALSE
2677               ((error0 == 0)
2678                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2679             next0 = hm->next_override;
2680
2681           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2682             {
2683               ip6_hop_by_hop_trace_t *t =
2684                 vlib_add_trace (vm, node, b0, sizeof (*t));
2685               u32 trace_len = (hbh0->length + 1) << 3;
2686               t->next_index = next0;
2687               /* Capture the h-b-h option verbatim */
2688               trace_len =
2689                 trace_len <
2690                 ARRAY_LEN (t->option_data) ? trace_len :
2691                 ARRAY_LEN (t->option_data);
2692               t->trace_len = trace_len;
2693               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2694             }
2695
2696           b0->error = error_node->errors[error0];
2697
2698           /* verify speculative enqueue, maybe switch current next frame */
2699           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2700                                            n_left_to_next, bi0, next0);
2701         }
2702       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2703     }
2704   return frame->n_vectors;
2705 }
2706
2707 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2708 {
2709   .name = "ip6-hop-by-hop",
2710   .sibling_of = "ip6-lookup",
2711   .vector_size = sizeof (u32),
2712   .format_trace = format_ip6_hop_by_hop_trace,
2713   .type = VLIB_NODE_TYPE_INTERNAL,
2714   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2715   .error_strings = ip6_hop_by_hop_error_strings,
2716   .n_next_nodes = 0,
2717 };
2718
2719 static clib_error_t *
2720 ip6_hop_by_hop_init (vlib_main_t * vm)
2721 {
2722   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2723   clib_memset (hm->options, 0, sizeof (hm->options));
2724   clib_memset (hm->trace, 0, sizeof (hm->trace));
2725   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2726   return (0);
2727 }
2728
2729 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2730
2731 #ifndef CLIB_MARCH_VARIANT
2732 void
2733 ip6_hbh_set_next_override (uword next)
2734 {
2735   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2736
2737   hm->next_override = next;
2738 }
2739
2740 int
2741 ip6_hbh_register_option (u8 option,
2742                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2743                                       ip6_hop_by_hop_option_t * opt),
2744                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2745 {
2746   ip6_main_t *im = &ip6_main;
2747   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2748
2749   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2750
2751   /* Already registered */
2752   if (hm->options[option])
2753     return (-1);
2754
2755   hm->options[option] = options;
2756   hm->trace[option] = trace;
2757
2758   /* Set global variable */
2759   im->hbh_enabled = 1;
2760
2761   return (0);
2762 }
2763
2764 int
2765 ip6_hbh_unregister_option (u8 option)
2766 {
2767   ip6_main_t *im = &ip6_main;
2768   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2769
2770   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2771
2772   /* Not registered */
2773   if (!hm->options[option])
2774     return (-1);
2775
2776   hm->options[option] = NULL;
2777   hm->trace[option] = NULL;
2778
2779   /* Disable global knob if this was the last option configured */
2780   int i;
2781   bool found = false;
2782   for (i = 0; i < 256; i++)
2783     {
2784       if (hm->options[option])
2785         {
2786           found = true;
2787           break;
2788         }
2789     }
2790   if (!found)
2791     im->hbh_enabled = 0;
2792
2793   return (0);
2794 }
2795
2796 /* Global IP6 main. */
2797 ip6_main_t ip6_main;
2798 #endif
2799
2800 static clib_error_t *
2801 ip6_lookup_init (vlib_main_t * vm)
2802 {
2803   ip6_main_t *im = &ip6_main;
2804   clib_error_t *error;
2805   uword i;
2806
2807   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2808     return error;
2809
2810   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2811     {
2812       u32 j, i0, i1;
2813
2814       i0 = i / 32;
2815       i1 = i % 32;
2816
2817       for (j = 0; j < i0; j++)
2818         im->fib_masks[i].as_u32[j] = ~0;
2819
2820       if (i1)
2821         im->fib_masks[i].as_u32[i0] =
2822           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2823     }
2824
2825   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2826
2827   /* Create FIB with index 0 and table id of 0. */
2828   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2829                                      FIB_SOURCE_DEFAULT_ROUTE);
2830   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2831                                       MFIB_SOURCE_DEFAULT_ROUTE);
2832
2833   {
2834     pg_node_t *pn;
2835     pn = pg_get_node (ip6_lookup_node.index);
2836     pn->unformat_edit = unformat_pg_ip6_header;
2837   }
2838
2839   /* Unless explicitly configured, don't process HBH options */
2840   im->hbh_enabled = 0;
2841
2842   return error;
2843 }
2844
2845 VLIB_INIT_FUNCTION (ip6_lookup_init);
2846
2847 static clib_error_t *
2848 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2849                               unformat_input_t * input,
2850                               vlib_cli_command_t * cmd)
2851 {
2852   int matched = 0;
2853   u32 table_id = 0;
2854   u32 flow_hash_config = 0;
2855   int rv;
2856
2857   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2858     {
2859       if (unformat (input, "table %d", &table_id))
2860         matched = 1;
2861 #define _(a, b, v)                                                            \
2862   else if (unformat (input, #a))                                              \
2863   {                                                                           \
2864     flow_hash_config |= v;                                                    \
2865     matched = 1;                                                              \
2866   }
2867       foreach_flow_hash_bit
2868 #undef _
2869         else
2870         break;
2871     }
2872
2873   if (matched == 0)
2874     return clib_error_return (0, "unknown input `%U'",
2875                               format_unformat_error, input);
2876
2877   rv = ip_flow_hash_set (AF_IP6, table_id, flow_hash_config);
2878   switch (rv)
2879     {
2880     case 0:
2881       break;
2882
2883     case -1:
2884       return clib_error_return (0, "no such FIB table %d", table_id);
2885
2886     default:
2887       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2888       break;
2889     }
2890
2891   return 0;
2892 }
2893
2894 /*?
2895  * Configure the set of IPv6 fields used by the flow hash.
2896  *
2897  * @cliexpar
2898  * @parblock
2899  * Example of how to set the flow hash on a given table:
2900  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2901  *
2902  * Example of display the configured flow hash:
2903  * @cliexstart{show ip6 fib}
2904  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2905  * @::/0
2906  *   unicast-ip6-chain
2907  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2908  *     [0] [@0]: dpo-drop ip6
2909  * fe80::/10
2910  *   unicast-ip6-chain
2911  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2912  *     [0] [@2]: dpo-receive
2913  * ff02::1/128
2914  *   unicast-ip6-chain
2915  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2916  *     [0] [@2]: dpo-receive
2917  * ff02::2/128
2918  *   unicast-ip6-chain
2919  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2920  *     [0] [@2]: dpo-receive
2921  * ff02::16/128
2922  *   unicast-ip6-chain
2923  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2924  *     [0] [@2]: dpo-receive
2925  * ff02::1:ff00:0/104
2926  *   unicast-ip6-chain
2927  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2928  *     [0] [@2]: dpo-receive
2929  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2930  * @::/0
2931  *   unicast-ip6-chain
2932  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2933  *     [0] [@0]: dpo-drop ip6
2934  * @::a:1:1:0:4/126
2935  *   unicast-ip6-chain
2936  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2937  *     [0] [@4]: ipv6-glean: af_packet0
2938  * @::a:1:1:0:7/128
2939  *   unicast-ip6-chain
2940  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2941  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2942  * fe80::/10
2943  *   unicast-ip6-chain
2944  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2945  *     [0] [@2]: dpo-receive
2946  * fe80::fe:3eff:fe3e:9222/128
2947  *   unicast-ip6-chain
2948  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2949  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2950  * ff02::1/128
2951  *   unicast-ip6-chain
2952  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2953  *     [0] [@2]: dpo-receive
2954  * ff02::2/128
2955  *   unicast-ip6-chain
2956  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2957  *     [0] [@2]: dpo-receive
2958  * ff02::16/128
2959  *   unicast-ip6-chain
2960  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2961  *     [0] [@2]: dpo-receive
2962  * ff02::1:ff00:0/104
2963  *   unicast-ip6-chain
2964  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2965  *     [0] [@2]: dpo-receive
2966  * @cliexend
2967  * @endparblock
2968 ?*/
2969 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2970   .path = "set ip6 flow-hash",
2971   .short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
2972                 "[dport] [proto] [reverse] [flowlabel]",
2973   .function = set_ip6_flow_hash_command_fn,
2974 };
2975
2976 static clib_error_t *
2977 show_ip6_local_command_fn (vlib_main_t * vm,
2978                            unformat_input_t * input, vlib_cli_command_t * cmd)
2979 {
2980   ip6_main_t *im = &ip6_main;
2981   ip_lookup_main_t *lm = &im->lookup_main;
2982   int i;
2983
2984   vlib_cli_output (vm, "Protocols handled by ip6_local");
2985   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2986     {
2987       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2988         {
2989
2990           u32 node_index = vlib_get_node (vm,
2991                                           ip6_local_node.index)->
2992             next_nodes[lm->local_next_by_ip_protocol[i]];
2993           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2994                            node_index);
2995         }
2996     }
2997   return 0;
2998 }
2999
3000
3001
3002 /*?
3003  * Display the set of protocols handled by the local IPv6 stack.
3004  *
3005  * @cliexpar
3006  * Example of how to display local protocol table:
3007  * @cliexstart{show ip6 local}
3008  * Protocols handled by ip6_local
3009  * 17
3010  * 43
3011  * 58
3012  * 115
3013  * @cliexend
3014 ?*/
3015 VLIB_CLI_COMMAND (show_ip6_local, static) =
3016 {
3017   .path = "show ip6 local",
3018   .function = show_ip6_local_command_fn,
3019   .short_help = "show ip6 local",
3020 };
3021
3022 #ifndef CLIB_MARCH_VARIANT
3023 int
3024 vnet_set_ip6_classify_intfc (vlib_main_t *vm, u32 sw_if_index, u32 table_index)
3025 {
3026   vnet_main_t *vnm = vnet_get_main ();
3027   vnet_interface_main_t *im = &vnm->interface_main;
3028   ip6_main_t *ipm = &ip6_main;
3029   ip_lookup_main_t *lm = &ipm->lookup_main;
3030   vnet_classify_main_t *cm = &vnet_classify_main;
3031   ip6_address_t *if_addr;
3032
3033   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3034     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3035
3036   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3037     return VNET_API_ERROR_NO_SUCH_ENTRY;
3038
3039   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3040   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3041
3042   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3043
3044   if (NULL != if_addr)
3045     {
3046       fib_prefix_t pfx = {
3047         .fp_len = 128,
3048         .fp_proto = FIB_PROTOCOL_IP6,
3049         .fp_addr.ip6 = *if_addr,
3050       };
3051       u32 fib_index;
3052
3053       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3054                                                        sw_if_index);
3055       if (table_index != (u32) ~ 0)
3056         {
3057           dpo_id_t dpo = DPO_INVALID;
3058           dpo_set (&dpo,
3059                    DPO_CLASSIFY,
3060                    DPO_PROTO_IP6,
3061                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3062           fib_table_entry_special_dpo_add (fib_index,
3063                                            &pfx,
3064                                            FIB_SOURCE_CLASSIFY,
3065                                            FIB_ENTRY_FLAG_NONE, &dpo);
3066           dpo_reset (&dpo);
3067         }
3068       else
3069         {
3070           fib_table_entry_special_remove (fib_index,
3071                                           &pfx, FIB_SOURCE_CLASSIFY);
3072         }
3073     }
3074
3075   return 0;
3076 }
3077 #endif
3078
3079 static clib_error_t *
3080 set_ip6_classify_command_fn (vlib_main_t * vm,
3081                              unformat_input_t * input,
3082                              vlib_cli_command_t * cmd)
3083 {
3084   u32 table_index = ~0;
3085   int table_index_set = 0;
3086   u32 sw_if_index = ~0;
3087   int rv;
3088
3089   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3090     {
3091       if (unformat (input, "table-index %d", &table_index))
3092         table_index_set = 1;
3093       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3094                          vnet_get_main (), &sw_if_index))
3095         ;
3096       else
3097         break;
3098     }
3099
3100   if (table_index_set == 0)
3101     return clib_error_return (0, "classify table-index must be specified");
3102
3103   if (sw_if_index == ~0)
3104     return clib_error_return (0, "interface / subif must be specified");
3105
3106   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3107
3108   switch (rv)
3109     {
3110     case 0:
3111       break;
3112
3113     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3114       return clib_error_return (0, "No such interface");
3115
3116     case VNET_API_ERROR_NO_SUCH_ENTRY:
3117       return clib_error_return (0, "No such classifier table");
3118     }
3119   return 0;
3120 }
3121
3122 /*?
3123  * Assign a classification table to an interface. The classification
3124  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3125  * commands. Once the table is create, use this command to filter packets
3126  * on an interface.
3127  *
3128  * @cliexpar
3129  * Example of how to assign a classification table to an interface:
3130  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3131 ?*/
3132 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3133 {
3134   .path = "set ip6 classify",
3135   .short_help =
3136   "set ip6 classify intfc <interface> table-index <classify-idx>",
3137   .function = set_ip6_classify_command_fn,
3138 };
3139
3140 /*
3141  * fd.io coding-style-patch-verification: ON
3142  *
3143  * Local Variables:
3144  * eval: (c-set-style "gnu")
3145  * End:
3146  */