ip: fix interface ip address del sw_if_index check
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_link.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/classify_dpo.h>
52 #include <vnet/classify/vnet_classify.h>
53
54 #ifndef CLIB_MARCH_VARIANT
55 #include <vppinfra/bihash_template.c>
56 #endif
57 #include <vnet/ip/ip6_forward.h>
58 #include <vnet/interface_output.h>
59
60 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
61 #define OI_DECAP   0x80000000
62
63 static void
64 ip6_add_interface_prefix_routes (ip6_main_t * im,
65                                  u32 sw_if_index,
66                                  u32 fib_index,
67                                  ip6_address_t * address, u32 address_length)
68 {
69   ip_lookup_main_t *lm = &im->lookup_main;
70   ip_interface_prefix_t *if_prefix;
71
72   /* *INDENT-OFF* */
73   ip_interface_prefix_key_t key = {
74     .prefix = {
75       .fp_len = address_length,
76       .fp_proto = FIB_PROTOCOL_IP6,
77       .fp_addr.ip6 = {
78         .as_u64 = {
79           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
80           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
81         },
82       },
83     },
84     .sw_if_index = sw_if_index,
85   };
86   /* *INDENT-ON* */
87
88   /* If prefix already set on interface, just increment ref count & return */
89   if_prefix = ip_get_interface_prefix (lm, &key);
90   if (if_prefix)
91     {
92       if_prefix->ref_count += 1;
93       return;
94     }
95
96   /* New prefix - allocate a pool entry, initialize it, add to the hash */
97   pool_get (lm->if_prefix_pool, if_prefix);
98   if_prefix->ref_count = 1;
99   clib_memcpy (&if_prefix->key, &key, sizeof (key));
100   mhash_set (&lm->prefix_to_if_prefix_index, &key,
101              if_prefix - lm->if_prefix_pool, 0 /* old value */ );
102
103   /* length < 128 - add glean */
104   if (address_length < 128)
105     {
106       /* set the glean route for the prefix */
107       fib_table_entry_update_one_path (fib_index, &key.prefix,
108                                        FIB_SOURCE_INTERFACE,
109                                        (FIB_ENTRY_FLAG_CONNECTED |
110                                         FIB_ENTRY_FLAG_ATTACHED),
111                                        DPO_PROTO_IP6,
112                                        /* No next-hop address */
113                                        NULL, sw_if_index,
114                                        /* invalid FIB index */
115                                        ~0, 1,
116                                        /* no out-label stack */
117                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
118     }
119 }
120
121 static void
122 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
123                           ip6_main_t * im, u32 fib_index,
124                           ip_interface_address_t * a)
125 {
126   ip_lookup_main_t *lm = &im->lookup_main;
127   ip6_address_t *address = ip_interface_address_get_address (lm, a);
128   fib_prefix_t pfx = {
129     .fp_len = a->address_length,
130     .fp_proto = FIB_PROTOCOL_IP6,
131     .fp_addr.ip6 = *address,
132   };
133
134   /* set special routes for the prefix if needed */
135   ip6_add_interface_prefix_routes (im, sw_if_index, fib_index,
136                                    address, a->address_length);
137
138   pfx.fp_len = 128;
139   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
140     {
141       u32 classify_table_index =
142         lm->classify_table_index_by_sw_if_index[sw_if_index];
143       if (classify_table_index != (u32) ~ 0)
144         {
145           dpo_id_t dpo = DPO_INVALID;
146
147           dpo_set (&dpo,
148                    DPO_CLASSIFY,
149                    DPO_PROTO_IP6,
150                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
151
152           fib_table_entry_special_dpo_add (fib_index,
153                                            &pfx,
154                                            FIB_SOURCE_CLASSIFY,
155                                            FIB_ENTRY_FLAG_NONE, &dpo);
156           dpo_reset (&dpo);
157         }
158     }
159
160   fib_table_entry_update_one_path (fib_index, &pfx,
161                                    FIB_SOURCE_INTERFACE,
162                                    (FIB_ENTRY_FLAG_CONNECTED |
163                                     FIB_ENTRY_FLAG_LOCAL),
164                                    DPO_PROTO_IP6,
165                                    &pfx.fp_addr,
166                                    sw_if_index, ~0,
167                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
168 }
169
170 static void
171 ip6_del_interface_prefix_routes (ip6_main_t * im,
172                                  u32 sw_if_index,
173                                  u32 fib_index,
174                                  ip6_address_t * address, u32 address_length)
175 {
176   ip_lookup_main_t *lm = &im->lookup_main;
177   ip_interface_prefix_t *if_prefix;
178
179   /* *INDENT-OFF* */
180   ip_interface_prefix_key_t key = {
181     .prefix = {
182       .fp_len = address_length,
183       .fp_proto = FIB_PROTOCOL_IP6,
184       .fp_addr.ip6 = {
185         .as_u64 = {
186           address->as_u64[0] & im->fib_masks[address_length].as_u64[0],
187           address->as_u64[1] & im->fib_masks[address_length].as_u64[1],
188         },
189       },
190     },
191     .sw_if_index = sw_if_index,
192   };
193   /* *INDENT-ON* */
194
195   if_prefix = ip_get_interface_prefix (lm, &key);
196   if (!if_prefix)
197     {
198       clib_warning ("Prefix not found while deleting %U",
199                     format_ip4_address_and_length, address, address_length);
200       return;
201     }
202
203   /* If not deleting last intf addr in prefix, decrement ref count & return */
204   if_prefix->ref_count -= 1;
205   if (if_prefix->ref_count > 0)
206     return;
207
208   /* length <= 128, delete glean route */
209   if (address_length <= 128)
210     {
211       /* remove glean route for prefix */
212       fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE);
213     }
214
215   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ );
216   pool_put (lm->if_prefix_pool, if_prefix);
217 }
218
219 static void
220 ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im,
221                           u32 fib_index,
222                           ip6_address_t * address, u32 address_length)
223 {
224   fib_prefix_t pfx = {
225     .fp_len = 128,
226     .fp_proto = FIB_PROTOCOL_IP6,
227     .fp_addr.ip6 = *address,
228   };
229
230   /* delete special routes for the prefix if needed */
231   ip6_del_interface_prefix_routes (im, sw_if_index, fib_index,
232                                    address, address_length);
233
234   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
235 }
236
237 #ifndef CLIB_MARCH_VARIANT
238 void
239 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
240 {
241   ip6_main_t *im = &ip6_main;
242
243   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
244
245   /*
246    * enable/disable only on the 1<->0 transition
247    */
248   if (is_enable)
249     {
250       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
251         return;
252     }
253   else
254     {
255       /* The ref count is 0 when an address is removed from an interface that has
256        * no address - this is not a ciritical error */
257       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
258           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
259         return;
260     }
261
262   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
263                                !is_enable, 0, 0);
264
265   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
266                                sw_if_index, !is_enable, 0, 0);
267 }
268
269 /* get first interface address */
270 ip6_address_t *
271 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
272 {
273   ip_lookup_main_t *lm = &im->lookup_main;
274   ip_interface_address_t *ia = 0;
275   ip6_address_t *result = 0;
276
277   /* *INDENT-OFF* */
278   foreach_ip_interface_address (lm, ia, sw_if_index,
279                                 1 /* honor unnumbered */,
280   ({
281     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
282     result = a;
283     break;
284   }));
285   /* *INDENT-ON* */
286   return result;
287 }
288
289 clib_error_t *
290 ip6_add_del_interface_address (vlib_main_t * vm,
291                                u32 sw_if_index,
292                                ip6_address_t * address,
293                                u32 address_length, u32 is_del)
294 {
295   vnet_main_t *vnm = vnet_get_main ();
296   ip6_main_t *im = &ip6_main;
297   ip_lookup_main_t *lm = &im->lookup_main;
298   clib_error_t *error = NULL;
299   u32 if_address_index;
300   ip6_address_fib_t ip6_af, *addr_fib = 0;
301   const ip6_address_t *ll_addr;
302
303   /* local0 interface doesn't support IP addressing */
304   if (sw_if_index == 0)
305     {
306       return
307         clib_error_create ("local0 interface doesn't support IP addressing");
308     }
309
310   if (ip6_address_is_link_local_unicast (address))
311     {
312       if (address_length != 128)
313         {
314           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
315           return
316             clib_error_create
317             ("prefix length of link-local address must be 128");
318         }
319       if (!is_del)
320         {
321           int rv;
322
323           rv = ip6_link_set_local_address (sw_if_index, address);
324
325           if (rv)
326             {
327               vnm->api_errno = rv;
328               return clib_error_create ("address not assignable");
329             }
330         }
331       else
332         {
333           ll_addr = ip6_get_link_local_address (sw_if_index);
334           if (ip6_address_is_equal (ll_addr, address))
335             {
336               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
337               return clib_error_create ("address not deletable");
338             }
339           else
340             {
341               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
342               return clib_error_create ("address not found");
343             }
344         }
345
346       return (NULL);
347     }
348
349   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
350   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
351
352   ip6_addr_fib_init (&ip6_af, address,
353                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
354   vec_add1 (addr_fib, ip6_af);
355
356   /* *INDENT-OFF* */
357   if (!is_del)
358     {
359       /* When adding an address check that it does not conflict
360          with an existing address on any interface in this table. */
361       ip_interface_address_t *ia;
362       vnet_sw_interface_t *sif;
363
364       pool_foreach(sif, vnm->interface_main.sw_interfaces,
365       ({
366           if (im->fib_index_by_sw_if_index[sw_if_index] ==
367               im->fib_index_by_sw_if_index[sif->sw_if_index])
368             {
369               foreach_ip_interface_address
370                 (&im->lookup_main, ia, sif->sw_if_index,
371                  0 /* honor unnumbered */ ,
372                  ({
373                    ip6_address_t * x =
374                      ip_interface_address_get_address
375                      (&im->lookup_main, ia);
376
377                    if (ip6_destination_matches_route
378                        (im, address, x, ia->address_length) ||
379                        ip6_destination_matches_route (im,
380                                                       x,
381                                                       address,
382                                                       address_length))
383                      {
384                        /* an intf may have >1 addr from the same prefix */
385                        if ((sw_if_index == sif->sw_if_index) &&
386                            (ia->address_length == address_length) &&
387                            !ip6_address_is_equal (x, address))
388                          continue;
389
390                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
391                          /* if the address we're comparing against is stale
392                           * then the CP has not added this one back yet, maybe
393                           * it never will, so we have to assume it won't and
394                           * ignore it. if it does add it back, then it will fail
395                           * because this one is now present */
396                          continue;
397
398                        /* error if the length or intf was different */
399                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
400                        error =  clib_error_create
401                          ("failed to add %U which conflicts with %U for interface %U",
402                           format_ip6_address_and_length, address,
403                           address_length,
404                           format_ip6_address_and_length, x,
405                           ia->address_length,
406                           format_vnet_sw_if_index_name, vnm,
407                           sif->sw_if_index);
408                        goto done;
409                      }
410                  }));
411             }
412       }));
413     }
414   /* *INDENT-ON* */
415
416   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
417
418   if (is_del)
419     {
420       if (~0 == if_address_index)
421         {
422           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
423           error = clib_error_create ("%U not found for interface %U",
424                                      lm->format_address_and_length,
425                                      addr_fib, address_length,
426                                      format_vnet_sw_if_index_name, vnm,
427                                      sw_if_index);
428           goto done;
429         }
430
431       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
432                                         address_length, sw_if_index);
433       if (error)
434         goto done;
435     }
436   else
437     {
438       if (~0 != if_address_index)
439         {
440           ip_interface_address_t *ia;
441
442           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
443
444           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
445             {
446               if (ia->sw_if_index == sw_if_index)
447                 {
448                   /* re-adding an address during the replace action.
449                    * consdier this the update. clear the flag and
450                    * we're done */
451                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
452                   goto done;
453                 }
454               else
455                 {
456                   /* The prefix is moving from one interface to another.
457                    * delete the stale and add the new */
458                   ip6_add_del_interface_address (vm,
459                                                  ia->sw_if_index,
460                                                  address, address_length, 1);
461                   ia = NULL;
462                   error = ip_interface_address_add (lm, sw_if_index,
463                                                     addr_fib, address_length,
464                                                     &if_address_index);
465                 }
466             }
467           else
468             {
469               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
470               error = clib_error_create
471                 ("Prefix %U already found on interface %U",
472                  lm->format_address_and_length, addr_fib, address_length,
473                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
474             }
475         }
476       else
477         error = ip_interface_address_add (lm, sw_if_index,
478                                           addr_fib, address_length,
479                                           &if_address_index);
480     }
481
482   if (error)
483     goto done;
484
485   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
486   if (!is_del)
487     ip6_link_enable (sw_if_index, NULL);
488
489   /* intf addr routes are added/deleted on admin up/down */
490   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
491     {
492       if (is_del)
493         ip6_del_interface_routes (sw_if_index,
494                                   im, ip6_af.fib_index, address,
495                                   address_length);
496       else
497         ip6_add_interface_routes (vnm, sw_if_index,
498                                   im, ip6_af.fib_index,
499                                   pool_elt_at_index (lm->if_address_pool,
500                                                      if_address_index));
501     }
502
503   ip6_add_del_interface_address_callback_t *cb;
504   vec_foreach (cb, im->add_del_interface_address_callbacks)
505     cb->function (im, cb->function_opaque, sw_if_index,
506                   address, address_length, if_address_index, is_del);
507
508   if (is_del)
509     ip6_link_disable (sw_if_index);
510
511 done:
512   vec_free (addr_fib);
513   return error;
514 }
515
516 #endif
517
518 static clib_error_t *
519 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
520 {
521   ip6_main_t *im = &ip6_main;
522   ip_interface_address_t *ia;
523   ip6_address_t *a;
524   u32 is_admin_up, fib_index;
525
526   /* Fill in lookup tables with default table (0). */
527   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
528
529   vec_validate_init_empty (im->
530                            lookup_main.if_address_pool_index_by_sw_if_index,
531                            sw_if_index, ~0);
532
533   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
534
535   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
536
537   /* *INDENT-OFF* */
538   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
539                                 0 /* honor unnumbered */,
540   ({
541     a = ip_interface_address_get_address (&im->lookup_main, ia);
542     if (is_admin_up)
543       ip6_add_interface_routes (vnm, sw_if_index,
544                                 im, fib_index,
545                                 ia);
546     else
547       ip6_del_interface_routes (sw_if_index, im, fib_index,
548                                 a, ia->address_length);
549   }));
550   /* *INDENT-ON* */
551
552   return 0;
553 }
554
555 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
556
557 /* Built-in ip6 unicast rx feature path definition */
558 /* *INDENT-OFF* */
559 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
560 {
561   .arc_name  = "ip6-unicast",
562   .start_nodes = VNET_FEATURES ("ip6-input"),
563   .last_in_arc = "ip6-lookup",
564   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
565 };
566
567 VNET_FEATURE_INIT (ip6_flow_classify, static) =
568 {
569   .arc_name = "ip6-unicast",
570   .node_name = "ip6-flow-classify",
571   .runs_before = VNET_FEATURES ("ip6-inacl"),
572 };
573
574 VNET_FEATURE_INIT (ip6_inacl, static) =
575 {
576   .arc_name = "ip6-unicast",
577   .node_name = "ip6-inacl",
578   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
579 };
580
581 VNET_FEATURE_INIT (ip6_policer_classify, static) =
582 {
583   .arc_name = "ip6-unicast",
584   .node_name = "ip6-policer-classify",
585   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
586 };
587
588 VNET_FEATURE_INIT (ip6_ipsec, static) =
589 {
590   .arc_name = "ip6-unicast",
591   .node_name = "ipsec6-input-feature",
592   .runs_before = VNET_FEATURES ("l2tp-decap"),
593 };
594
595 VNET_FEATURE_INIT (ip6_l2tp, static) =
596 {
597   .arc_name = "ip6-unicast",
598   .node_name = "l2tp-decap",
599   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
600 };
601
602 VNET_FEATURE_INIT (ip6_vpath, static) =
603 {
604   .arc_name = "ip6-unicast",
605   .node_name = "vpath-input-ip6",
606   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
607 };
608
609 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
610 {
611   .arc_name = "ip6-unicast",
612   .node_name = "ip6-vxlan-bypass",
613   .runs_before = VNET_FEATURES ("ip6-lookup"),
614 };
615
616 VNET_FEATURE_INIT (ip6_not_enabled, static) =
617 {
618   .arc_name = "ip6-unicast",
619   .node_name = "ip6-not-enabled",
620   .runs_before = VNET_FEATURES ("ip6-lookup"),
621 };
622
623 VNET_FEATURE_INIT (ip6_lookup, static) =
624 {
625   .arc_name = "ip6-unicast",
626   .node_name = "ip6-lookup",
627   .runs_before = 0,  /*last feature*/
628 };
629
630 /* Built-in ip6 multicast rx feature path definition (none now) */
631 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
632 {
633   .arc_name  = "ip6-multicast",
634   .start_nodes = VNET_FEATURES ("ip6-input"),
635   .last_in_arc = "ip6-mfib-forward-lookup",
636   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
637 };
638
639 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
640   .arc_name = "ip6-multicast",
641   .node_name = "vpath-input-ip6",
642   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
643 };
644
645 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
646   .arc_name = "ip6-multicast",
647   .node_name = "ip6-not-enabled",
648   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
649 };
650
651 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
652   .arc_name = "ip6-multicast",
653   .node_name = "ip6-mfib-forward-lookup",
654   .runs_before = 0, /* last feature */
655 };
656
657 /* Built-in ip4 tx feature path definition */
658 VNET_FEATURE_ARC_INIT (ip6_output, static) =
659 {
660   .arc_name  = "ip6-output",
661   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
662   .last_in_arc = "interface-output",
663   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
664 };
665
666 VNET_FEATURE_INIT (ip6_outacl, static) = {
667   .arc_name = "ip6-output",
668   .node_name = "ip6-outacl",
669   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
670 };
671
672 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
673   .arc_name = "ip6-output",
674   .node_name = "ipsec6-output-feature",
675   .runs_before = VNET_FEATURES ("interface-output"),
676 };
677
678 VNET_FEATURE_INIT (ip6_interface_output, static) = {
679   .arc_name = "ip6-output",
680   .node_name = "interface-output",
681   .runs_before = 0, /* not before any other features */
682 };
683 /* *INDENT-ON* */
684
685 static clib_error_t *
686 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
687 {
688   ip6_main_t *im = &ip6_main;
689
690   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
691   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
692
693   if (!is_add)
694     {
695       /* Ensure that IPv6 is disabled */
696       ip6_main_t *im6 = &ip6_main;
697       ip_lookup_main_t *lm6 = &im6->lookup_main;
698       ip_interface_address_t *ia = 0;
699       ip6_address_t *address;
700       vlib_main_t *vm = vlib_get_main ();
701
702       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
703       /* *INDENT-OFF* */
704       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
705       ({
706         address = ip_interface_address_get_address (lm6, ia);
707         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
708       }));
709       /* *INDENT-ON* */
710       ip6_mfib_interface_enable_disable (sw_if_index, 0);
711     }
712
713   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
714                                is_add, 0, 0);
715
716   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
717                                sw_if_index, is_add, 0, 0);
718
719   return /* no error */ 0;
720 }
721
722 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
723
724 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
725                                 vlib_node_runtime_t * node,
726                                 vlib_frame_t * frame)
727 {
728   return ip6_lookup_inline (vm, node, frame);
729 }
730
731 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
732
733 /* *INDENT-OFF* */
734 VLIB_REGISTER_NODE (ip6_lookup_node) =
735 {
736   .name = "ip6-lookup",
737   .vector_size = sizeof (u32),
738   .format_trace = format_ip6_lookup_trace,
739   .n_next_nodes = IP6_LOOKUP_N_NEXT,
740   .next_nodes = IP6_LOOKUP_NEXT_NODES,
741 };
742 /* *INDENT-ON* */
743
744 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
745                                       vlib_node_runtime_t * node,
746                                       vlib_frame_t * frame)
747 {
748   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
749   u32 n_left, *from;
750   u32 thread_index = vm->thread_index;
751   ip6_main_t *im = &ip6_main;
752   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
753   u16 nexts[VLIB_FRAME_SIZE], *next;
754
755   from = vlib_frame_vector_args (frame);
756   n_left = frame->n_vectors;
757   next = nexts;
758
759   vlib_get_buffers (vm, from, bufs, n_left);
760
761   while (n_left >= 4)
762     {
763       const load_balance_t *lb0, *lb1;
764       const ip6_header_t *ip0, *ip1;
765       u32 lbi0, hc0, lbi1, hc1;
766       const dpo_id_t *dpo0, *dpo1;
767
768       /* Prefetch next iteration. */
769       {
770         vlib_prefetch_buffer_header (b[2], STORE);
771         vlib_prefetch_buffer_header (b[3], STORE);
772
773         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
774         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
775       }
776
777       ip0 = vlib_buffer_get_current (b[0]);
778       ip1 = vlib_buffer_get_current (b[1]);
779       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
780       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
781
782       lb0 = load_balance_get (lbi0);
783       lb1 = load_balance_get (lbi1);
784
785       /*
786        * this node is for via FIBs we can re-use the hash value from the
787        * to node if present.
788        * We don't want to use the same hash value at each level in the recursion
789        * graph as that would lead to polarisation
790        */
791       hc0 = hc1 = 0;
792
793       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
794         {
795           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
796             {
797               hc0 = vnet_buffer (b[0])->ip.flow_hash =
798                 vnet_buffer (b[0])->ip.flow_hash >> 1;
799             }
800           else
801             {
802               hc0 = vnet_buffer (b[0])->ip.flow_hash =
803                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
804             }
805           dpo0 = load_balance_get_fwd_bucket
806             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
807         }
808       else
809         {
810           dpo0 = load_balance_get_bucket_i (lb0, 0);
811         }
812       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
813         {
814           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
815             {
816               hc1 = vnet_buffer (b[1])->ip.flow_hash =
817                 vnet_buffer (b[1])->ip.flow_hash >> 1;
818             }
819           else
820             {
821               hc1 = vnet_buffer (b[1])->ip.flow_hash =
822                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
823             }
824           dpo1 = load_balance_get_fwd_bucket
825             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
826         }
827       else
828         {
829           dpo1 = load_balance_get_bucket_i (lb1, 0);
830         }
831
832       next[0] = dpo0->dpoi_next_node;
833       next[1] = dpo1->dpoi_next_node;
834
835       /* Only process the HBH Option Header if explicitly configured to do so */
836       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
837         {
838           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
839             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
840         }
841       /* Only process the HBH Option Header if explicitly configured to do so */
842       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
843         {
844           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
845             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
846         }
847
848       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
849       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
850
851       vlib_increment_combined_counter
852         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
853       vlib_increment_combined_counter
854         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
855
856       b += 2;
857       next += 2;
858       n_left -= 2;
859     }
860
861   while (n_left > 0)
862     {
863       const load_balance_t *lb0;
864       const ip6_header_t *ip0;
865       const dpo_id_t *dpo0;
866       u32 lbi0, hc0;
867
868       ip0 = vlib_buffer_get_current (b[0]);
869       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
870
871       lb0 = load_balance_get (lbi0);
872
873       hc0 = 0;
874       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
875         {
876           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
877             {
878               hc0 = vnet_buffer (b[0])->ip.flow_hash =
879                 vnet_buffer (b[0])->ip.flow_hash >> 1;
880             }
881           else
882             {
883               hc0 = vnet_buffer (b[0])->ip.flow_hash =
884                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
885             }
886           dpo0 = load_balance_get_fwd_bucket
887             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
888         }
889       else
890         {
891           dpo0 = load_balance_get_bucket_i (lb0, 0);
892         }
893
894       next[0] = dpo0->dpoi_next_node;
895       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
896
897       /* Only process the HBH Option Header if explicitly configured to do so */
898       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
899         {
900           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
901             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
902         }
903
904       vlib_increment_combined_counter
905         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
906
907       b += 1;
908       next += 1;
909       n_left -= 1;
910     }
911
912   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
913
914   if (node->flags & VLIB_NODE_FLAG_TRACE)
915     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
916
917   return frame->n_vectors;
918 }
919
920 /* *INDENT-OFF* */
921 VLIB_REGISTER_NODE (ip6_load_balance_node) =
922 {
923   .name = "ip6-load-balance",
924   .vector_size = sizeof (u32),
925   .sibling_of = "ip6-lookup",
926   .format_trace = format_ip6_lookup_trace,
927 };
928 /* *INDENT-ON* */
929
930 typedef struct
931 {
932   /* Adjacency taken. */
933   u32 adj_index;
934   u32 flow_hash;
935   u32 fib_index;
936
937   /* Packet data, possibly *after* rewrite. */
938   u8 packet_data[128 - 1 * sizeof (u32)];
939 }
940 ip6_forward_next_trace_t;
941
942 #ifndef CLIB_MARCH_VARIANT
943 u8 *
944 format_ip6_forward_next_trace (u8 * s, va_list * args)
945 {
946   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
947   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
948   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
949   u32 indent = format_get_indent (s);
950
951   s = format (s, "%U%U",
952               format_white_space, indent,
953               format_ip6_header, t->packet_data, sizeof (t->packet_data));
954   return s;
955 }
956 #endif
957
958 static u8 *
959 format_ip6_lookup_trace (u8 * s, va_list * args)
960 {
961   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
962   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
963   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
964   u32 indent = format_get_indent (s);
965
966   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
967               t->fib_index, t->adj_index, t->flow_hash);
968   s = format (s, "\n%U%U",
969               format_white_space, indent,
970               format_ip6_header, t->packet_data, sizeof (t->packet_data));
971   return s;
972 }
973
974
975 static u8 *
976 format_ip6_rewrite_trace (u8 * s, va_list * args)
977 {
978   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
979   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
980   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
981   u32 indent = format_get_indent (s);
982
983   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
984               t->fib_index, t->adj_index, format_ip_adjacency,
985               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
986   s = format (s, "\n%U%U",
987               format_white_space, indent,
988               format_ip_adjacency_packet_data,
989               t->packet_data, sizeof (t->packet_data));
990   return s;
991 }
992
993 /* Common trace function for all ip6-forward next nodes. */
994 #ifndef CLIB_MARCH_VARIANT
995 void
996 ip6_forward_next_trace (vlib_main_t * vm,
997                         vlib_node_runtime_t * node,
998                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
999 {
1000   u32 *from, n_left;
1001   ip6_main_t *im = &ip6_main;
1002
1003   n_left = frame->n_vectors;
1004   from = vlib_frame_vector_args (frame);
1005
1006   while (n_left >= 4)
1007     {
1008       u32 bi0, bi1;
1009       vlib_buffer_t *b0, *b1;
1010       ip6_forward_next_trace_t *t0, *t1;
1011
1012       /* Prefetch next iteration. */
1013       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1014       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1015
1016       bi0 = from[0];
1017       bi1 = from[1];
1018
1019       b0 = vlib_get_buffer (vm, bi0);
1020       b1 = vlib_get_buffer (vm, bi1);
1021
1022       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1023         {
1024           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1025           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1026           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1027           t0->fib_index =
1028             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1029              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1030             vec_elt (im->fib_index_by_sw_if_index,
1031                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1032
1033           clib_memcpy_fast (t0->packet_data,
1034                             vlib_buffer_get_current (b0),
1035                             sizeof (t0->packet_data));
1036         }
1037       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1038         {
1039           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1040           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1041           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1042           t1->fib_index =
1043             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1044              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1045             vec_elt (im->fib_index_by_sw_if_index,
1046                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1047
1048           clib_memcpy_fast (t1->packet_data,
1049                             vlib_buffer_get_current (b1),
1050                             sizeof (t1->packet_data));
1051         }
1052       from += 2;
1053       n_left -= 2;
1054     }
1055
1056   while (n_left >= 1)
1057     {
1058       u32 bi0;
1059       vlib_buffer_t *b0;
1060       ip6_forward_next_trace_t *t0;
1061
1062       bi0 = from[0];
1063
1064       b0 = vlib_get_buffer (vm, bi0);
1065
1066       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1067         {
1068           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1069           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1070           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1071           t0->fib_index =
1072             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1073              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1074             vec_elt (im->fib_index_by_sw_if_index,
1075                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1076
1077           clib_memcpy_fast (t0->packet_data,
1078                             vlib_buffer_get_current (b0),
1079                             sizeof (t0->packet_data));
1080         }
1081       from += 1;
1082       n_left -= 1;
1083     }
1084 }
1085
1086 /* Compute TCP/UDP/ICMP6 checksum in software. */
1087 u16
1088 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1089                                    ip6_header_t * ip0, int *bogus_lengthp)
1090 {
1091   ip_csum_t sum0 = 0;
1092   u16 payload_length, payload_length_host_byte_order;
1093   u32 i;
1094   u32 headers_size = sizeof (ip0[0]);
1095   u8 *data_this_buffer;
1096   u8 next_hdr = ip0->protocol;
1097
1098   ASSERT (bogus_lengthp);
1099   *bogus_lengthp = 0;
1100
1101   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1102   data_this_buffer = (u8 *) (ip0 + 1);
1103   payload_length = ip0->payload_length;
1104
1105   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
1106    * or UDP-Ping packets */
1107   if (PREDICT_FALSE (next_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1108     {
1109       u32 skip_bytes;
1110       ip6_hop_by_hop_ext_t *ext_hdr =
1111         (ip6_hop_by_hop_ext_t *) data_this_buffer;
1112
1113       /* validate really icmp6 next */
1114       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1115               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
1116
1117       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
1118       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
1119
1120       payload_length_host_byte_order -= skip_bytes;
1121       headers_size += skip_bytes;
1122
1123       /* pseudo-header adjustments:
1124        *   exclude ext header bytes from payload length
1125        *   use payload IP proto rather than ext header IP proto
1126        */
1127       payload_length = clib_host_to_net_u16 (payload_length_host_byte_order);
1128       next_hdr = ext_hdr->next_hdr;
1129     }
1130
1131   /* Initialize checksum with ip pseudo-header. */
1132   sum0 = payload_length + clib_host_to_net_u16 (next_hdr);
1133
1134   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1135     {
1136       sum0 = ip_csum_with_carry
1137         (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1138       sum0 = ip_csum_with_carry
1139         (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1140     }
1141
1142   if (p0)
1143     return ip_calculate_l4_checksum (vm, p0, sum0,
1144                                      payload_length_host_byte_order,
1145                                      (u8 *) ip0, headers_size, NULL);
1146   else
1147     return ip_calculate_l4_checksum (vm, 0, sum0,
1148                                      payload_length_host_byte_order, NULL, 0,
1149                                      data_this_buffer);
1150 }
1151
1152 u32
1153 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1154 {
1155   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1156   udp_header_t *udp0;
1157   u16 sum16;
1158   int bogus_length;
1159
1160   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1161   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1162           || ip0->protocol == IP_PROTOCOL_ICMP6
1163           || ip0->protocol == IP_PROTOCOL_UDP
1164           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1165
1166   udp0 = (void *) (ip0 + 1);
1167   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1168     {
1169       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1170                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1171       return p0->flags;
1172     }
1173
1174   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1175
1176   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1177                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1178
1179   return p0->flags;
1180 }
1181 #endif
1182
1183 /**
1184  * @brief returns number of links on which src is reachable.
1185  */
1186 always_inline int
1187 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1188 {
1189   const load_balance_t *lb0;
1190   index_t lbi;
1191   u32 fib_index;
1192
1193   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1194                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1195   fib_index =
1196     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1197     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1198
1199   lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address);
1200   lb0 = load_balance_get (lbi);
1201
1202   return (fib_urpf_check_size (lb0->lb_urpf));
1203 }
1204
1205 always_inline u8
1206 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1207                            u32 * udp_offset0)
1208 {
1209   u32 proto0;
1210   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1211   if (proto0 != IP_PROTOCOL_UDP)
1212     {
1213       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1214       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1215     }
1216   return proto0;
1217 }
1218
1219 /* *INDENT-OFF* */
1220 VNET_FEATURE_ARC_INIT (ip6_local) =
1221 {
1222   .arc_name  = "ip6-local",
1223   .start_nodes = VNET_FEATURES ("ip6-local"),
1224 };
1225 /* *INDENT-ON* */
1226
1227 static_always_inline u8
1228 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
1229 {
1230
1231   u16 payload_length_host_byte_order;
1232   u32 n_this_buffer, n_bytes_left;
1233   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1234   u32 headers_size = sizeof (ip0[0]);
1235   u8 *data_this_buffer;
1236
1237
1238   data_this_buffer = (u8 *) (ip0 + 1);
1239
1240   ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *) data_this_buffer;
1241
1242   /* validate really icmp6 next */
1243
1244   if (!(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1245       || (ext_hdr->next_hdr == IP_PROTOCOL_UDP))
1246     return 0;
1247
1248
1249   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1250   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1251
1252
1253   u32 n_ip_bytes_this_buffer =
1254     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1255   if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
1256     {
1257       n_this_buffer = p0->current_length > headers_size ?
1258         n_ip_bytes_this_buffer - headers_size : 0;
1259     }
1260
1261   n_bytes_left -= n_this_buffer;
1262   n_bytes_left -= p0->total_length_not_including_first_buffer;
1263
1264   if (n_bytes_left == 0)
1265     return 0;
1266   else
1267     return 1;
1268 }
1269
1270
1271 always_inline uword
1272 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1273                   vlib_frame_t * frame, int head_of_feature_arc)
1274 {
1275   ip6_main_t *im = &ip6_main;
1276   ip_lookup_main_t *lm = &im->lookup_main;
1277   u32 *from, n_left_from;
1278   vlib_node_runtime_t *error_node =
1279     vlib_node_get_runtime (vm, ip6_input_node.index);
1280   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1281   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1282   u16 nexts[VLIB_FRAME_SIZE], *next;
1283
1284   from = vlib_frame_vector_args (frame);
1285   n_left_from = frame->n_vectors;
1286
1287   if (node->flags & VLIB_NODE_FLAG_TRACE)
1288     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1289
1290   vlib_get_buffers (vm, from, bufs, n_left_from);
1291   b = bufs;
1292   next = nexts;
1293
1294   while (n_left_from > 2)
1295     {
1296       /* Prefetch next iteration. */
1297       if (n_left_from >= 6)
1298         {
1299           vlib_prefetch_buffer_header (b[4], STORE);
1300           vlib_prefetch_buffer_header (b[5], STORE);
1301           vlib_prefetch_buffer_data (b[2], LOAD);
1302           vlib_prefetch_buffer_data (b[3], LOAD);
1303         }
1304
1305       u8 error[2];
1306       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1307       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1308
1309       ip6_header_t *ip[2];
1310       ip[0] = vlib_buffer_get_current (b[0]);
1311       ip[1] = vlib_buffer_get_current (b[1]);
1312
1313       if (head_of_feature_arc)
1314         {
1315           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1316           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1317
1318           u8 type[2];
1319           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1320           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1321
1322           u32 flags[2];
1323           flags[0] = b[0]->flags;
1324           flags[1] = b[1]->flags;
1325
1326           u32 good_l4_csum[2];
1327           good_l4_csum[0] =
1328             flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1329                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1330                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1331           good_l4_csum[1] =
1332             flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1333                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1334                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1335
1336           u32 udp_offset[2] = { };
1337           u8 is_tcp_udp[2];
1338           is_tcp_udp[0] =
1339             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1340           is_tcp_udp[1] =
1341             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1342           i16 len_diff[2] = { 0 };
1343           if (PREDICT_TRUE (is_tcp_udp[0]))
1344             {
1345               udp_header_t *udp =
1346                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1347               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1348                 && udp->checksum == 0;
1349               /* optimistically verify UDP length. */
1350               u16 ip_len, udp_len;
1351               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1352               udp_len = clib_net_to_host_u16 (udp->length);
1353               len_diff[0] = ip_len - udp_len;
1354             }
1355           if (PREDICT_TRUE (is_tcp_udp[1]))
1356             {
1357               udp_header_t *udp =
1358                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1359               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1360                 && udp->checksum == 0;
1361               /* optimistically verify UDP length. */
1362               u16 ip_len, udp_len;
1363               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1364               udp_len = clib_net_to_host_u16 (udp->length);
1365               len_diff[1] = ip_len - udp_len;
1366             }
1367
1368           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1369           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1370
1371           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1372           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1373
1374           u8 need_csum[2];
1375           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1376             && !good_l4_csum[0]
1377             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1378           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1379             && !good_l4_csum[1]
1380             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1381           if (PREDICT_FALSE (need_csum[0]))
1382             {
1383               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1384               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1385               error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1386             }
1387           else
1388             {
1389               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1390                 error[0] = IP6_ERROR_BAD_LENGTH;
1391             }
1392           if (PREDICT_FALSE (need_csum[1]))
1393             {
1394               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1395               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1396               error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1397             }
1398           else
1399             {
1400               if (ip6_tcp_udp_icmp_bad_length (vm, b[1]))
1401                 error[1] = IP6_ERROR_BAD_LENGTH;
1402             }
1403
1404
1405           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1406
1407           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1408
1409           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1410                          IP6_ERROR_UDP_CHECKSUM,
1411                          "Wrong IP6 errors constants");
1412           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1413                          IP6_ERROR_ICMP_CHECKSUM,
1414                          "Wrong IP6 errors constants");
1415
1416           error[0] =
1417             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1418           error[1] =
1419             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1420
1421           /* Drop packets from unroutable hosts. */
1422           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1423           u8 unroutable[2];
1424           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1425             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1426             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1427           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1428             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1429             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1430           if (PREDICT_FALSE (unroutable[0]))
1431             {
1432               error[0] =
1433                 !ip6_urpf_loose_check (im, b[0],
1434                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1435                 : error[0];
1436             }
1437           if (PREDICT_FALSE (unroutable[1]))
1438             {
1439               error[1] =
1440                 !ip6_urpf_loose_check (im, b[1],
1441                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1442                 : error[1];
1443             }
1444
1445           vnet_buffer (b[0])->ip.fib_index =
1446             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1447             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1448             vnet_buffer (b[0])->ip.fib_index;
1449           vnet_buffer (b[1])->ip.fib_index =
1450             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1451             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1452             vnet_buffer (b[1])->ip.fib_index;
1453         }                       /* head_of_feature_arc */
1454
1455       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1456       next[0] =
1457         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1458       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1459       next[1] =
1460         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1461
1462       b[0]->error = error_node->errors[0];
1463       b[1]->error = error_node->errors[1];
1464
1465       if (head_of_feature_arc)
1466         {
1467           u8 ip6_unknown[2];
1468           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1469           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1470           if (PREDICT_TRUE (ip6_unknown[0]))
1471             {
1472               u32 next32 = next[0];
1473               vnet_feature_arc_start (arc_index,
1474                                       vnet_buffer (b[0])->sw_if_index
1475                                       [VLIB_RX], &next32, b[0]);
1476               next[0] = next32;
1477             }
1478           if (PREDICT_TRUE (ip6_unknown[1]))
1479             {
1480               u32 next32 = next[1];
1481               vnet_feature_arc_start (arc_index,
1482                                       vnet_buffer (b[1])->sw_if_index
1483                                       [VLIB_RX], &next32, b[1]);
1484               next[1] = next32;
1485             }
1486         }
1487
1488       /* next */
1489       b += 2;
1490       next += 2;
1491       n_left_from -= 2;
1492     }
1493
1494   while (n_left_from)
1495     {
1496       u8 error;
1497       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1498
1499       ip6_header_t *ip;
1500       ip = vlib_buffer_get_current (b[0]);
1501
1502       if (head_of_feature_arc)
1503         {
1504           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1505           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1506
1507           u32 flags = b[0]->flags;
1508           u32 good_l4_csum =
1509             flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1510                      VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1511                      VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1512
1513           u32 udp_offset;
1514           i16 len_diff = 0;
1515           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1516           if (PREDICT_TRUE (is_tcp_udp))
1517             {
1518               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1519               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1520               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1521                 && udp->checksum == 0;
1522               /* optimistically verify UDP length. */
1523               u16 ip_len, udp_len;
1524               ip_len = clib_net_to_host_u16 (ip->payload_length);
1525               udp_len = clib_net_to_host_u16 (udp->length);
1526               len_diff = ip_len - udp_len;
1527             }
1528
1529           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1530           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1531
1532           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum
1533             && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1534           if (PREDICT_FALSE (need_csum))
1535             {
1536               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1537               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1538               error = IP6_ERROR_UNKNOWN_PROTOCOL;
1539             }
1540           else
1541             {
1542               if (ip6_tcp_udp_icmp_bad_length (vm, b[0]))
1543                 error = IP6_ERROR_BAD_LENGTH;
1544             }
1545
1546
1547
1548           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1549
1550           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1551                          IP6_ERROR_UDP_CHECKSUM,
1552                          "Wrong IP6 errors constants");
1553           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1554                          IP6_ERROR_ICMP_CHECKSUM,
1555                          "Wrong IP6 errors constants");
1556
1557           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1558
1559           /* Drop packets from unroutable hosts. */
1560           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1561           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1562             && type != IP_BUILTIN_PROTOCOL_ICMP
1563             && !ip6_address_is_link_local_unicast (&ip->src_address);
1564           if (PREDICT_FALSE (unroutable))
1565             {
1566               error =
1567                 !ip6_urpf_loose_check (im, b[0],
1568                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1569                 error;
1570             }
1571
1572           vnet_buffer (b[0])->ip.fib_index =
1573             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1574             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1575             vnet_buffer (b[0])->ip.fib_index;
1576         }                       /* head_of_feature_arc */
1577
1578       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1579       next[0] =
1580         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1581
1582       b[0]->error = error_node->errors[0];
1583
1584       if (head_of_feature_arc)
1585         {
1586           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1587             {
1588               u32 next32 = next[0];
1589               vnet_feature_arc_start (arc_index,
1590                                       vnet_buffer (b[0])->sw_if_index
1591                                       [VLIB_RX], &next32, b[0]);
1592               next[0] = next32;
1593             }
1594         }
1595
1596       /* next */
1597       b += 1;
1598       next += 1;
1599       n_left_from -= 1;
1600     }
1601
1602   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1603   return frame->n_vectors;
1604 }
1605
1606 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1607                                vlib_frame_t * frame)
1608 {
1609   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1610 }
1611
1612 /* *INDENT-OFF* */
1613 VLIB_REGISTER_NODE (ip6_local_node) =
1614 {
1615   .name = "ip6-local",
1616   .vector_size = sizeof (u32),
1617   .format_trace = format_ip6_forward_next_trace,
1618   .n_next_nodes = IP_LOCAL_N_NEXT,
1619   .next_nodes =
1620   {
1621     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1622     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1623     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1624     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1625     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly",
1626   },
1627 };
1628 /* *INDENT-ON* */
1629
1630 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1631                                           vlib_node_runtime_t * node,
1632                                           vlib_frame_t * frame)
1633 {
1634   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1635 }
1636
1637 /* *INDENT-OFF* */
1638 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1639   .name = "ip6-local-end-of-arc",
1640   .vector_size = sizeof (u32),
1641
1642   .format_trace = format_ip6_forward_next_trace,
1643   .sibling_of = "ip6-local",
1644 };
1645
1646 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1647   .arc_name = "ip6-local",
1648   .node_name = "ip6-local-end-of-arc",
1649   .runs_before = 0, /* not before any other features */
1650 };
1651 /* *INDENT-ON* */
1652
1653 #ifdef CLIB_MARCH_VARIANT
1654 extern vlib_node_registration_t ip6_local_node;
1655 #else
1656 void
1657 ip6_register_protocol (u32 protocol, u32 node_index)
1658 {
1659   vlib_main_t *vm = vlib_get_main ();
1660   ip6_main_t *im = &ip6_main;
1661   ip_lookup_main_t *lm = &im->lookup_main;
1662
1663   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1664   lm->local_next_by_ip_protocol[protocol] =
1665     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1666 }
1667
1668 void
1669 ip6_unregister_protocol (u32 protocol)
1670 {
1671   ip6_main_t *im = &ip6_main;
1672   ip_lookup_main_t *lm = &im->lookup_main;
1673
1674   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1675   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1676 }
1677 #endif
1678
1679 typedef enum
1680 {
1681   IP6_REWRITE_NEXT_DROP,
1682   IP6_REWRITE_NEXT_ICMP_ERROR,
1683   IP6_REWRITE_NEXT_FRAGMENT,
1684   IP6_REWRITE_N_NEXT            /* Last */
1685 } ip6_rewrite_next_t;
1686
1687 /**
1688  * This bits of an IPv6 address to mask to construct a multicast
1689  * MAC address
1690  */
1691 #define IP6_MCAST_ADDR_MASK 0xffffffff
1692
1693 always_inline void
1694 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1695                u16 adj_packet_bytes, bool is_locally_generated,
1696                u32 * next, u8 is_midchain, u32 * error)
1697 {
1698   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1699     {
1700       if (is_locally_generated)
1701         {
1702           /* IP fragmentation */
1703           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1704                                    (is_midchain ?
1705                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1706                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1707           *next = IP6_REWRITE_NEXT_FRAGMENT;
1708           *error = IP6_ERROR_MTU_EXCEEDED;
1709         }
1710       else
1711         {
1712           *error = IP6_ERROR_MTU_EXCEEDED;
1713           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1714                                        adj_packet_bytes);
1715           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1716         }
1717     }
1718 }
1719
1720 always_inline uword
1721 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1722                              vlib_node_runtime_t * node,
1723                              vlib_frame_t * frame,
1724                              int do_counters, int is_midchain, int is_mcast)
1725 {
1726   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1727   u32 *from = vlib_frame_vector_args (frame);
1728   u32 n_left_from, n_left_to_next, *to_next, next_index;
1729   vlib_node_runtime_t *error_node =
1730     vlib_node_get_runtime (vm, ip6_input_node.index);
1731
1732   n_left_from = frame->n_vectors;
1733   next_index = node->cached_next_index;
1734   u32 thread_index = vm->thread_index;
1735
1736   while (n_left_from > 0)
1737     {
1738       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1739
1740       while (n_left_from >= 4 && n_left_to_next >= 2)
1741         {
1742           const ip_adjacency_t *adj0, *adj1;
1743           vlib_buffer_t *p0, *p1;
1744           ip6_header_t *ip0, *ip1;
1745           u32 pi0, rw_len0, next0, error0, adj_index0;
1746           u32 pi1, rw_len1, next1, error1, adj_index1;
1747           u32 tx_sw_if_index0, tx_sw_if_index1;
1748           bool is_locally_originated0, is_locally_originated1;
1749
1750           /* Prefetch next iteration. */
1751           {
1752             vlib_buffer_t *p2, *p3;
1753
1754             p2 = vlib_get_buffer (vm, from[2]);
1755             p3 = vlib_get_buffer (vm, from[3]);
1756
1757             vlib_prefetch_buffer_header (p2, LOAD);
1758             vlib_prefetch_buffer_header (p3, LOAD);
1759
1760             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1761             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1762
1763             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1764             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1765           }
1766
1767           pi0 = to_next[0] = from[0];
1768           pi1 = to_next[1] = from[1];
1769
1770           from += 2;
1771           n_left_from -= 2;
1772           to_next += 2;
1773           n_left_to_next -= 2;
1774
1775           p0 = vlib_get_buffer (vm, pi0);
1776           p1 = vlib_get_buffer (vm, pi1);
1777
1778           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1779           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1780
1781           ip0 = vlib_buffer_get_current (p0);
1782           ip1 = vlib_buffer_get_current (p1);
1783
1784           error0 = error1 = IP6_ERROR_NONE;
1785           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1786
1787           is_locally_originated0 =
1788             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1789           if (PREDICT_TRUE (!is_locally_originated0))
1790             {
1791               i32 hop_limit0 = ip0->hop_limit;
1792
1793               /* Input node should have reject packets with hop limit 0. */
1794               ASSERT (ip0->hop_limit > 0);
1795
1796               hop_limit0 -= 1;
1797
1798               ip0->hop_limit = hop_limit0;
1799
1800               /*
1801                * If the hop count drops below 1 when forwarding, generate
1802                * an ICMP response.
1803                */
1804               if (PREDICT_FALSE (hop_limit0 <= 0))
1805                 {
1806                   error0 = IP6_ERROR_TIME_EXPIRED;
1807                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1808                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1809                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1810                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1811                                                0);
1812                 }
1813             }
1814
1815           is_locally_originated1 =
1816             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1817           if (PREDICT_TRUE (!is_locally_originated1))
1818             {
1819               i32 hop_limit1 = ip1->hop_limit;
1820
1821               /* Input node should have reject packets with hop limit 0. */
1822               ASSERT (ip1->hop_limit > 0);
1823
1824               hop_limit1 -= 1;
1825
1826               ip1->hop_limit = hop_limit1;
1827
1828               /*
1829                * If the hop count drops below 1 when forwarding, generate
1830                * an ICMP response.
1831                */
1832               if (PREDICT_FALSE (hop_limit1 <= 0))
1833                 {
1834                   error1 = IP6_ERROR_TIME_EXPIRED;
1835                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1836                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1837                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1838                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1839                                                0);
1840                 }
1841             }
1842
1843           adj0 = adj_get (adj_index0);
1844           adj1 = adj_get (adj_index1);
1845
1846           rw_len0 = adj0[0].rewrite_header.data_bytes;
1847           rw_len1 = adj1[0].rewrite_header.data_bytes;
1848           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1849           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1850
1851           if (do_counters)
1852             {
1853               vlib_increment_combined_counter
1854                 (&adjacency_counters,
1855                  thread_index, adj_index0, 1,
1856                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1857               vlib_increment_combined_counter
1858                 (&adjacency_counters,
1859                  thread_index, adj_index1, 1,
1860                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1861             }
1862
1863           /* Check MTU of outgoing interface. */
1864           u16 ip0_len =
1865             clib_net_to_host_u16 (ip0->payload_length) +
1866             sizeof (ip6_header_t);
1867           u16 ip1_len =
1868             clib_net_to_host_u16 (ip1->payload_length) +
1869             sizeof (ip6_header_t);
1870           if (p0->flags & VNET_BUFFER_F_GSO)
1871             ip0_len = gso_mtu_sz (p0);
1872           if (p1->flags & VNET_BUFFER_F_GSO)
1873             ip1_len = gso_mtu_sz (p1);
1874
1875
1876
1877           ip6_mtu_check (p0, ip0_len,
1878                          adj0[0].rewrite_header.max_l3_packet_bytes,
1879                          is_locally_originated0, &next0, is_midchain,
1880                          &error0);
1881           ip6_mtu_check (p1, ip1_len,
1882                          adj1[0].rewrite_header.max_l3_packet_bytes,
1883                          is_locally_originated1, &next1, is_midchain,
1884                          &error1);
1885
1886           /* Don't adjust the buffer for hop count issue; icmp-error node
1887            * wants to see the IP header */
1888           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1889             {
1890               p0->current_data -= rw_len0;
1891               p0->current_length += rw_len0;
1892
1893               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1894               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1895               next0 = adj0[0].rewrite_header.next_index;
1896
1897               if (PREDICT_FALSE
1898                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1899                 vnet_feature_arc_start_w_cfg_index
1900                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
1901                    adj0->ia_cfg_index);
1902             }
1903           else
1904             {
1905               p0->error = error_node->errors[error0];
1906             }
1907           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1908             {
1909               p1->current_data -= rw_len1;
1910               p1->current_length += rw_len1;
1911
1912               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1913               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1914               next1 = adj1[0].rewrite_header.next_index;
1915
1916               if (PREDICT_FALSE
1917                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1918                 vnet_feature_arc_start_w_cfg_index
1919                   (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
1920                    adj1->ia_cfg_index);
1921             }
1922           else
1923             {
1924               p1->error = error_node->errors[error1];
1925             }
1926
1927           if (is_midchain)
1928             {
1929               /* before we paint on the next header, update the L4
1930                * checksums if required, since there's no offload on a tunnel */
1931               vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
1932                                           1 /* is_ip6 */ ,
1933                                           0 /* with gso */ );
1934               vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
1935                                           1 /* is_ip6 */ ,
1936                                           0 /* with gso */ );
1937
1938               /* Guess we are only writing on ipv6 header. */
1939               vnet_rewrite_two_headers (adj0[0], adj1[0],
1940                                         ip0, ip1, sizeof (ip6_header_t));
1941             }
1942           else
1943             /* Guess we are only writing on simple Ethernet header. */
1944             vnet_rewrite_two_headers (adj0[0], adj1[0],
1945                                       ip0, ip1, sizeof (ethernet_header_t));
1946
1947           if (is_midchain)
1948             {
1949               if (adj0->sub_type.midchain.fixup_func)
1950                 adj0->sub_type.midchain.fixup_func
1951                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1952               if (adj1->sub_type.midchain.fixup_func)
1953                 adj1->sub_type.midchain.fixup_func
1954                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1955             }
1956           if (is_mcast)
1957             {
1958               /*
1959                * copy bytes from the IP address into the MAC rewrite
1960                */
1961               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1962                                           adj0->
1963                                           rewrite_header.dst_mcast_offset,
1964                                           &ip0->dst_address.as_u32[3],
1965                                           (u8 *) ip0);
1966               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1967                                           adj1->
1968                                           rewrite_header.dst_mcast_offset,
1969                                           &ip1->dst_address.as_u32[3],
1970                                           (u8 *) ip1);
1971             }
1972
1973           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1974                                            to_next, n_left_to_next,
1975                                            pi0, pi1, next0, next1);
1976         }
1977
1978       while (n_left_from > 0 && n_left_to_next > 0)
1979         {
1980           ip_adjacency_t *adj0;
1981           vlib_buffer_t *p0;
1982           ip6_header_t *ip0;
1983           u32 pi0, rw_len0;
1984           u32 adj_index0, next0, error0;
1985           u32 tx_sw_if_index0;
1986           bool is_locally_originated0;
1987
1988           pi0 = to_next[0] = from[0];
1989
1990           p0 = vlib_get_buffer (vm, pi0);
1991
1992           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1993
1994           adj0 = adj_get (adj_index0);
1995
1996           ip0 = vlib_buffer_get_current (p0);
1997
1998           error0 = IP6_ERROR_NONE;
1999           next0 = IP6_REWRITE_NEXT_DROP;
2000
2001           /* Check hop limit */
2002           is_locally_originated0 =
2003             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
2004           if (PREDICT_TRUE (!is_locally_originated0))
2005             {
2006               i32 hop_limit0 = ip0->hop_limit;
2007
2008               ASSERT (ip0->hop_limit > 0);
2009
2010               hop_limit0 -= 1;
2011
2012               ip0->hop_limit = hop_limit0;
2013
2014               if (PREDICT_FALSE (hop_limit0 <= 0))
2015                 {
2016                   /*
2017                    * If the hop count drops below 1 when forwarding, generate
2018                    * an ICMP response.
2019                    */
2020                   error0 = IP6_ERROR_TIME_EXPIRED;
2021                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
2022                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2023                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
2024                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
2025                                                0);
2026                 }
2027             }
2028
2029           if (is_midchain)
2030             {
2031               vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
2032                                           1 /* is_ip6 */ ,
2033                                           0 /* with gso */ );
2034
2035               /* Guess we are only writing on ip6 header. */
2036               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
2037             }
2038           else
2039             /* Guess we are only writing on simple Ethernet header. */
2040             vnet_rewrite_one_header (adj0[0], ip0,
2041                                      sizeof (ethernet_header_t));
2042
2043           /* Update packet buffer attributes/set output interface. */
2044           rw_len0 = adj0[0].rewrite_header.data_bytes;
2045           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2046
2047           if (do_counters)
2048             {
2049               vlib_increment_combined_counter
2050                 (&adjacency_counters,
2051                  thread_index, adj_index0, 1,
2052                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2053             }
2054
2055           /* Check MTU of outgoing interface. */
2056           u16 ip0_len =
2057             clib_net_to_host_u16 (ip0->payload_length) +
2058             sizeof (ip6_header_t);
2059           if (p0->flags & VNET_BUFFER_F_GSO)
2060             ip0_len = gso_mtu_sz (p0);
2061
2062           ip6_mtu_check (p0, ip0_len,
2063                          adj0[0].rewrite_header.max_l3_packet_bytes,
2064                          is_locally_originated0, &next0, is_midchain,
2065                          &error0);
2066
2067           /* Don't adjust the buffer for hop count issue; icmp-error node
2068            * wants to see the IP header */
2069           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
2070             {
2071               p0->current_data -= rw_len0;
2072               p0->current_length += rw_len0;
2073
2074               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2075
2076               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2077               next0 = adj0[0].rewrite_header.next_index;
2078
2079               if (PREDICT_FALSE
2080                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2081                 vnet_feature_arc_start_w_cfg_index
2082                   (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
2083                    adj0->ia_cfg_index);
2084             }
2085           else
2086             {
2087               p0->error = error_node->errors[error0];
2088             }
2089
2090           if (is_midchain)
2091             {
2092               if (adj0->sub_type.midchain.fixup_func)
2093                 adj0->sub_type.midchain.fixup_func
2094                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2095             }
2096           if (is_mcast)
2097             {
2098               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2099                                           adj0->
2100                                           rewrite_header.dst_mcast_offset,
2101                                           &ip0->dst_address.as_u32[3],
2102                                           (u8 *) ip0);
2103             }
2104
2105           from += 1;
2106           n_left_from -= 1;
2107           to_next += 1;
2108           n_left_to_next -= 1;
2109
2110           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2111                                            to_next, n_left_to_next,
2112                                            pi0, next0);
2113         }
2114
2115       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2116     }
2117
2118   /* Need to do trace after rewrites to pick up new packet data. */
2119   if (node->flags & VLIB_NODE_FLAG_TRACE)
2120     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
2121
2122   return frame->n_vectors;
2123 }
2124
2125 always_inline uword
2126 ip6_rewrite_inline (vlib_main_t * vm,
2127                     vlib_node_runtime_t * node,
2128                     vlib_frame_t * frame,
2129                     int do_counters, int is_midchain, int is_mcast)
2130 {
2131   return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2132                                       is_midchain, is_mcast);
2133 }
2134
2135 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2136                                  vlib_node_runtime_t * node,
2137                                  vlib_frame_t * frame)
2138 {
2139   if (adj_are_counters_enabled ())
2140     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2141   else
2142     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2143 }
2144
2145 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2146                                        vlib_node_runtime_t * node,
2147                                        vlib_frame_t * frame)
2148 {
2149   if (adj_are_counters_enabled ())
2150     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2151   else
2152     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2153 }
2154
2155 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2156                                        vlib_node_runtime_t * node,
2157                                        vlib_frame_t * frame)
2158 {
2159   if (adj_are_counters_enabled ())
2160     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2161   else
2162     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2163 }
2164
2165 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2166                                   vlib_node_runtime_t * node,
2167                                   vlib_frame_t * frame)
2168 {
2169   if (adj_are_counters_enabled ())
2170     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2171   else
2172     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2173 }
2174
2175 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2176                                         vlib_node_runtime_t * node,
2177                                         vlib_frame_t * frame)
2178 {
2179   if (adj_are_counters_enabled ())
2180     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2181   else
2182     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2183 }
2184
2185 /* *INDENT-OFF* */
2186 VLIB_REGISTER_NODE (ip6_midchain_node) =
2187 {
2188   .name = "ip6-midchain",
2189   .vector_size = sizeof (u32),
2190   .format_trace = format_ip6_forward_next_trace,
2191   .sibling_of = "ip6-rewrite",
2192   };
2193
2194 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2195 {
2196   .name = "ip6-rewrite",
2197   .vector_size = sizeof (u32),
2198   .format_trace = format_ip6_rewrite_trace,
2199   .n_next_nodes = IP6_REWRITE_N_NEXT,
2200   .next_nodes =
2201   {
2202     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2203     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2204     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2205   },
2206 };
2207
2208 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2209   .name = "ip6-rewrite-bcast",
2210   .vector_size = sizeof (u32),
2211
2212   .format_trace = format_ip6_rewrite_trace,
2213   .sibling_of = "ip6-rewrite",
2214 };
2215
2216 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2217 {
2218   .name = "ip6-rewrite-mcast",
2219   .vector_size = sizeof (u32),
2220   .format_trace = format_ip6_rewrite_trace,
2221   .sibling_of = "ip6-rewrite",
2222 };
2223
2224
2225 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2226 {
2227   .name = "ip6-mcast-midchain",
2228   .vector_size = sizeof (u32),
2229   .format_trace = format_ip6_rewrite_trace,
2230   .sibling_of = "ip6-rewrite",
2231 };
2232
2233 /* *INDENT-ON* */
2234
2235 /*
2236  * Hop-by-Hop handling
2237  */
2238 #ifndef CLIB_MARCH_VARIANT
2239 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2240 #endif /* CLIB_MARCH_VARIANT */
2241
2242 #define foreach_ip6_hop_by_hop_error \
2243 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2244 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2245 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2246
2247 /* *INDENT-OFF* */
2248 typedef enum
2249 {
2250 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2251   foreach_ip6_hop_by_hop_error
2252 #undef _
2253   IP6_HOP_BY_HOP_N_ERROR,
2254 } ip6_hop_by_hop_error_t;
2255 /* *INDENT-ON* */
2256
2257 /*
2258  * Primary h-b-h handler trace support
2259  * We work pretty hard on the problem for obvious reasons
2260  */
2261 typedef struct
2262 {
2263   u32 next_index;
2264   u32 trace_len;
2265   u8 option_data[256];
2266 } ip6_hop_by_hop_trace_t;
2267
2268 extern vlib_node_registration_t ip6_hop_by_hop_node;
2269
2270 static char *ip6_hop_by_hop_error_strings[] = {
2271 #define _(sym,string) string,
2272   foreach_ip6_hop_by_hop_error
2273 #undef _
2274 };
2275
2276 #ifndef CLIB_MARCH_VARIANT
2277 u8 *
2278 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2279 {
2280   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2281   int total_len = va_arg (*args, int);
2282   ip6_hop_by_hop_option_t *opt0, *limit0;
2283   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2284   u8 type0;
2285
2286   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2287               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2288
2289   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2290   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2291
2292   while (opt0 < limit0)
2293     {
2294       type0 = opt0->type;
2295       switch (type0)
2296         {
2297         case 0:         /* Pad, just stop */
2298           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2299           break;
2300
2301         default:
2302           if (hm->trace[type0])
2303             {
2304               s = (*hm->trace[type0]) (s, opt0);
2305             }
2306           else
2307             {
2308               s =
2309                 format (s, "\n    unrecognized option %d length %d", type0,
2310                         opt0->length);
2311             }
2312           opt0 =
2313             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2314                                          sizeof (ip6_hop_by_hop_option_t));
2315           break;
2316         }
2317     }
2318   return s;
2319 }
2320 #endif
2321
2322 static u8 *
2323 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2324 {
2325   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2326   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2327   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2328   ip6_hop_by_hop_header_t *hbh0;
2329   ip6_hop_by_hop_option_t *opt0, *limit0;
2330   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2331
2332   u8 type0;
2333
2334   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2335
2336   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2337               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2338
2339   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2340   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2341
2342   while (opt0 < limit0)
2343     {
2344       type0 = opt0->type;
2345       switch (type0)
2346         {
2347         case 0:         /* Pad, just stop */
2348           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2349           break;
2350
2351         default:
2352           if (hm->trace[type0])
2353             {
2354               s = (*hm->trace[type0]) (s, opt0);
2355             }
2356           else
2357             {
2358               s =
2359                 format (s, "\n    unrecognized option %d length %d", type0,
2360                         opt0->length);
2361             }
2362           opt0 =
2363             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2364                                          sizeof (ip6_hop_by_hop_option_t));
2365           break;
2366         }
2367     }
2368   return s;
2369 }
2370
2371 always_inline u8
2372 ip6_scan_hbh_options (vlib_buffer_t * b0,
2373                       ip6_header_t * ip0,
2374                       ip6_hop_by_hop_header_t * hbh0,
2375                       ip6_hop_by_hop_option_t * opt0,
2376                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2377 {
2378   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2379   u8 type0;
2380   u8 error0 = 0;
2381
2382   while (opt0 < limit0)
2383     {
2384       type0 = opt0->type;
2385       switch (type0)
2386         {
2387         case 0:         /* Pad1 */
2388           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2389           continue;
2390         case 1:         /* PadN */
2391           break;
2392         default:
2393           if (hm->options[type0])
2394             {
2395               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2396                 {
2397                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2398                   return (error0);
2399                 }
2400             }
2401           else
2402             {
2403               /* Unrecognized mandatory option, check the two high order bits */
2404               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2405                 {
2406                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2407                   break;
2408                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2409                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2410                   *next0 = IP_LOOKUP_NEXT_DROP;
2411                   break;
2412                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2413                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2414                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2415                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2416                                                ICMP6_parameter_problem_unrecognized_option,
2417                                                (u8 *) opt0 - (u8 *) ip0);
2418                   break;
2419                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2420                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2421                   if (!ip6_address_is_multicast (&ip0->dst_address))
2422                     {
2423                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2424                       icmp6_error_set_vnet_buffer (b0,
2425                                                    ICMP6_parameter_problem,
2426                                                    ICMP6_parameter_problem_unrecognized_option,
2427                                                    (u8 *) opt0 - (u8 *) ip0);
2428                     }
2429                   else
2430                     {
2431                       *next0 = IP_LOOKUP_NEXT_DROP;
2432                     }
2433                   break;
2434                 }
2435               return (error0);
2436             }
2437         }
2438       opt0 =
2439         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2440                                      sizeof (ip6_hop_by_hop_option_t));
2441     }
2442   return (error0);
2443 }
2444
2445 /*
2446  * Process the Hop-by-Hop Options header
2447  */
2448 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2449                                     vlib_node_runtime_t * node,
2450                                     vlib_frame_t * frame)
2451 {
2452   vlib_node_runtime_t *error_node =
2453     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2454   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2455   u32 n_left_from, *from, *to_next;
2456   ip_lookup_next_t next_index;
2457
2458   from = vlib_frame_vector_args (frame);
2459   n_left_from = frame->n_vectors;
2460   next_index = node->cached_next_index;
2461
2462   while (n_left_from > 0)
2463     {
2464       u32 n_left_to_next;
2465
2466       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2467
2468       while (n_left_from >= 4 && n_left_to_next >= 2)
2469         {
2470           u32 bi0, bi1;
2471           vlib_buffer_t *b0, *b1;
2472           u32 next0, next1;
2473           ip6_header_t *ip0, *ip1;
2474           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2475           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2476           u8 error0 = 0, error1 = 0;
2477
2478           /* Prefetch next iteration. */
2479           {
2480             vlib_buffer_t *p2, *p3;
2481
2482             p2 = vlib_get_buffer (vm, from[2]);
2483             p3 = vlib_get_buffer (vm, from[3]);
2484
2485             vlib_prefetch_buffer_header (p2, LOAD);
2486             vlib_prefetch_buffer_header (p3, LOAD);
2487
2488             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2489             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2490           }
2491
2492           /* Speculatively enqueue b0, b1 to the current next frame */
2493           to_next[0] = bi0 = from[0];
2494           to_next[1] = bi1 = from[1];
2495           from += 2;
2496           to_next += 2;
2497           n_left_from -= 2;
2498           n_left_to_next -= 2;
2499
2500           b0 = vlib_get_buffer (vm, bi0);
2501           b1 = vlib_get_buffer (vm, bi1);
2502
2503           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2504           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2505           ip_adjacency_t *adj0 = adj_get (adj_index0);
2506           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2507           ip_adjacency_t *adj1 = adj_get (adj_index1);
2508
2509           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2510           next0 = adj0->lookup_next_index;
2511           next1 = adj1->lookup_next_index;
2512
2513           ip0 = vlib_buffer_get_current (b0);
2514           ip1 = vlib_buffer_get_current (b1);
2515           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2516           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2517           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2518           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2519           limit0 =
2520             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2521                                          ((hbh0->length + 1) << 3));
2522           limit1 =
2523             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2524                                          ((hbh1->length + 1) << 3));
2525
2526           /*
2527            * Basic validity checks
2528            */
2529           if ((hbh0->length + 1) << 3 >
2530               clib_net_to_host_u16 (ip0->payload_length))
2531             {
2532               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2533               next0 = IP_LOOKUP_NEXT_DROP;
2534               goto outdual;
2535             }
2536           /* Scan the set of h-b-h options, process ones that we understand */
2537           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2538
2539           if ((hbh1->length + 1) << 3 >
2540               clib_net_to_host_u16 (ip1->payload_length))
2541             {
2542               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2543               next1 = IP_LOOKUP_NEXT_DROP;
2544               goto outdual;
2545             }
2546           /* Scan the set of h-b-h options, process ones that we understand */
2547           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2548
2549         outdual:
2550           /* Has the classifier flagged this buffer for special treatment? */
2551           if (PREDICT_FALSE
2552               ((error0 == 0)
2553                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2554             next0 = hm->next_override;
2555
2556           /* Has the classifier flagged this buffer for special treatment? */
2557           if (PREDICT_FALSE
2558               ((error1 == 0)
2559                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2560             next1 = hm->next_override;
2561
2562           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2563             {
2564               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2565                 {
2566                   ip6_hop_by_hop_trace_t *t =
2567                     vlib_add_trace (vm, node, b0, sizeof (*t));
2568                   u32 trace_len = (hbh0->length + 1) << 3;
2569                   t->next_index = next0;
2570                   /* Capture the h-b-h option verbatim */
2571                   trace_len =
2572                     trace_len <
2573                     ARRAY_LEN (t->option_data) ? trace_len :
2574                     ARRAY_LEN (t->option_data);
2575                   t->trace_len = trace_len;
2576                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2577                 }
2578               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2579                 {
2580                   ip6_hop_by_hop_trace_t *t =
2581                     vlib_add_trace (vm, node, b1, sizeof (*t));
2582                   u32 trace_len = (hbh1->length + 1) << 3;
2583                   t->next_index = next1;
2584                   /* Capture the h-b-h option verbatim */
2585                   trace_len =
2586                     trace_len <
2587                     ARRAY_LEN (t->option_data) ? trace_len :
2588                     ARRAY_LEN (t->option_data);
2589                   t->trace_len = trace_len;
2590                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2591                 }
2592
2593             }
2594
2595           b0->error = error_node->errors[error0];
2596           b1->error = error_node->errors[error1];
2597
2598           /* verify speculative enqueue, maybe switch current next frame */
2599           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2600                                            n_left_to_next, bi0, bi1, next0,
2601                                            next1);
2602         }
2603
2604       while (n_left_from > 0 && n_left_to_next > 0)
2605         {
2606           u32 bi0;
2607           vlib_buffer_t *b0;
2608           u32 next0;
2609           ip6_header_t *ip0;
2610           ip6_hop_by_hop_header_t *hbh0;
2611           ip6_hop_by_hop_option_t *opt0, *limit0;
2612           u8 error0 = 0;
2613
2614           /* Speculatively enqueue b0 to the current next frame */
2615           bi0 = from[0];
2616           to_next[0] = bi0;
2617           from += 1;
2618           to_next += 1;
2619           n_left_from -= 1;
2620           n_left_to_next -= 1;
2621
2622           b0 = vlib_get_buffer (vm, bi0);
2623           /*
2624            * Default use the next_index from the adjacency.
2625            * A HBH option rarely redirects to a different node
2626            */
2627           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2628           ip_adjacency_t *adj0 = adj_get (adj_index0);
2629           next0 = adj0->lookup_next_index;
2630
2631           ip0 = vlib_buffer_get_current (b0);
2632           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2633           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2634           limit0 =
2635             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2636                                          ((hbh0->length + 1) << 3));
2637
2638           /*
2639            * Basic validity checks
2640            */
2641           if ((hbh0->length + 1) << 3 >
2642               clib_net_to_host_u16 (ip0->payload_length))
2643             {
2644               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2645               next0 = IP_LOOKUP_NEXT_DROP;
2646               goto out0;
2647             }
2648
2649           /* Scan the set of h-b-h options, process ones that we understand */
2650           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2651
2652         out0:
2653           /* Has the classifier flagged this buffer for special treatment? */
2654           if (PREDICT_FALSE
2655               ((error0 == 0)
2656                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2657             next0 = hm->next_override;
2658
2659           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2660             {
2661               ip6_hop_by_hop_trace_t *t =
2662                 vlib_add_trace (vm, node, b0, sizeof (*t));
2663               u32 trace_len = (hbh0->length + 1) << 3;
2664               t->next_index = next0;
2665               /* Capture the h-b-h option verbatim */
2666               trace_len =
2667                 trace_len <
2668                 ARRAY_LEN (t->option_data) ? trace_len :
2669                 ARRAY_LEN (t->option_data);
2670               t->trace_len = trace_len;
2671               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2672             }
2673
2674           b0->error = error_node->errors[error0];
2675
2676           /* verify speculative enqueue, maybe switch current next frame */
2677           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2678                                            n_left_to_next, bi0, next0);
2679         }
2680       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2681     }
2682   return frame->n_vectors;
2683 }
2684
2685 /* *INDENT-OFF* */
2686 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2687 {
2688   .name = "ip6-hop-by-hop",
2689   .sibling_of = "ip6-lookup",
2690   .vector_size = sizeof (u32),
2691   .format_trace = format_ip6_hop_by_hop_trace,
2692   .type = VLIB_NODE_TYPE_INTERNAL,
2693   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2694   .error_strings = ip6_hop_by_hop_error_strings,
2695   .n_next_nodes = 0,
2696 };
2697 /* *INDENT-ON* */
2698
2699 static clib_error_t *
2700 ip6_hop_by_hop_init (vlib_main_t * vm)
2701 {
2702   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2703   clib_memset (hm->options, 0, sizeof (hm->options));
2704   clib_memset (hm->trace, 0, sizeof (hm->trace));
2705   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2706   return (0);
2707 }
2708
2709 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2710
2711 #ifndef CLIB_MARCH_VARIANT
2712 void
2713 ip6_hbh_set_next_override (uword next)
2714 {
2715   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2716
2717   hm->next_override = next;
2718 }
2719
2720 int
2721 ip6_hbh_register_option (u8 option,
2722                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2723                                       ip6_hop_by_hop_option_t * opt),
2724                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2725 {
2726   ip6_main_t *im = &ip6_main;
2727   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2728
2729   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2730
2731   /* Already registered */
2732   if (hm->options[option])
2733     return (-1);
2734
2735   hm->options[option] = options;
2736   hm->trace[option] = trace;
2737
2738   /* Set global variable */
2739   im->hbh_enabled = 1;
2740
2741   return (0);
2742 }
2743
2744 int
2745 ip6_hbh_unregister_option (u8 option)
2746 {
2747   ip6_main_t *im = &ip6_main;
2748   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2749
2750   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2751
2752   /* Not registered */
2753   if (!hm->options[option])
2754     return (-1);
2755
2756   hm->options[option] = NULL;
2757   hm->trace[option] = NULL;
2758
2759   /* Disable global knob if this was the last option configured */
2760   int i;
2761   bool found = false;
2762   for (i = 0; i < 256; i++)
2763     {
2764       if (hm->options[option])
2765         {
2766           found = true;
2767           break;
2768         }
2769     }
2770   if (!found)
2771     im->hbh_enabled = 0;
2772
2773   return (0);
2774 }
2775
2776 /* Global IP6 main. */
2777 ip6_main_t ip6_main;
2778 #endif
2779
2780 static clib_error_t *
2781 ip6_lookup_init (vlib_main_t * vm)
2782 {
2783   ip6_main_t *im = &ip6_main;
2784   clib_error_t *error;
2785   uword i;
2786
2787   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2788     return error;
2789
2790   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2791     {
2792       u32 j, i0, i1;
2793
2794       i0 = i / 32;
2795       i1 = i % 32;
2796
2797       for (j = 0; j < i0; j++)
2798         im->fib_masks[i].as_u32[j] = ~0;
2799
2800       if (i1)
2801         im->fib_masks[i].as_u32[i0] =
2802           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2803     }
2804
2805   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2806
2807   if (im->lookup_table_nbuckets == 0)
2808     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2809
2810   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2811
2812   if (im->lookup_table_size == 0)
2813     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2814
2815   clib_bihash_init_24_8 (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2816                          "ip6 FIB fwding table",
2817                          im->lookup_table_nbuckets, im->lookup_table_size);
2818   clib_bihash_init_24_8 (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2819                          "ip6 FIB non-fwding table",
2820                          im->lookup_table_nbuckets, im->lookup_table_size);
2821   clib_bihash_init_40_8 (&im->ip6_mtable.ip6_mhash,
2822                          "ip6 mFIB table",
2823                          im->lookup_table_nbuckets, im->lookup_table_size);
2824
2825   /* Create FIB with index 0 and table id of 0. */
2826   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2827                                      FIB_SOURCE_DEFAULT_ROUTE);
2828   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2829                                       MFIB_SOURCE_DEFAULT_ROUTE);
2830
2831   {
2832     pg_node_t *pn;
2833     pn = pg_get_node (ip6_lookup_node.index);
2834     pn->unformat_edit = unformat_pg_ip6_header;
2835   }
2836
2837   /* Unless explicitly configured, don't process HBH options */
2838   im->hbh_enabled = 0;
2839
2840   return error;
2841 }
2842
2843 VLIB_INIT_FUNCTION (ip6_lookup_init);
2844
2845 #ifndef CLIB_MARCH_VARIANT
2846 int
2847 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2848 {
2849   u32 fib_index;
2850
2851   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2852
2853   if (~0 == fib_index)
2854     return VNET_API_ERROR_NO_SUCH_FIB;
2855
2856   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2857                                   flow_hash_config);
2858
2859   return 0;
2860 }
2861 #endif
2862
2863 static clib_error_t *
2864 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2865                               unformat_input_t * input,
2866                               vlib_cli_command_t * cmd)
2867 {
2868   int matched = 0;
2869   u32 table_id = 0;
2870   u32 flow_hash_config = 0;
2871   int rv;
2872
2873   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2874     {
2875       if (unformat (input, "table %d", &table_id))
2876         matched = 1;
2877 #define _(a,v) \
2878     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2879       foreach_flow_hash_bit
2880 #undef _
2881         else
2882         break;
2883     }
2884
2885   if (matched == 0)
2886     return clib_error_return (0, "unknown input `%U'",
2887                               format_unformat_error, input);
2888
2889   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2890   switch (rv)
2891     {
2892     case 0:
2893       break;
2894
2895     case -1:
2896       return clib_error_return (0, "no such FIB table %d", table_id);
2897
2898     default:
2899       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2900       break;
2901     }
2902
2903   return 0;
2904 }
2905
2906 /*?
2907  * Configure the set of IPv6 fields used by the flow hash.
2908  *
2909  * @cliexpar
2910  * @parblock
2911  * Example of how to set the flow hash on a given table:
2912  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2913  *
2914  * Example of display the configured flow hash:
2915  * @cliexstart{show ip6 fib}
2916  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2917  * @::/0
2918  *   unicast-ip6-chain
2919  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2920  *     [0] [@0]: dpo-drop ip6
2921  * fe80::/10
2922  *   unicast-ip6-chain
2923  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2924  *     [0] [@2]: dpo-receive
2925  * ff02::1/128
2926  *   unicast-ip6-chain
2927  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2928  *     [0] [@2]: dpo-receive
2929  * ff02::2/128
2930  *   unicast-ip6-chain
2931  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2932  *     [0] [@2]: dpo-receive
2933  * ff02::16/128
2934  *   unicast-ip6-chain
2935  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2936  *     [0] [@2]: dpo-receive
2937  * ff02::1:ff00:0/104
2938  *   unicast-ip6-chain
2939  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2940  *     [0] [@2]: dpo-receive
2941  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2942  * @::/0
2943  *   unicast-ip6-chain
2944  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2945  *     [0] [@0]: dpo-drop ip6
2946  * @::a:1:1:0:4/126
2947  *   unicast-ip6-chain
2948  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2949  *     [0] [@4]: ipv6-glean: af_packet0
2950  * @::a:1:1:0:7/128
2951  *   unicast-ip6-chain
2952  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2953  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2954  * fe80::/10
2955  *   unicast-ip6-chain
2956  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2957  *     [0] [@2]: dpo-receive
2958  * fe80::fe:3eff:fe3e:9222/128
2959  *   unicast-ip6-chain
2960  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2961  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2962  * ff02::1/128
2963  *   unicast-ip6-chain
2964  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2965  *     [0] [@2]: dpo-receive
2966  * ff02::2/128
2967  *   unicast-ip6-chain
2968  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2969  *     [0] [@2]: dpo-receive
2970  * ff02::16/128
2971  *   unicast-ip6-chain
2972  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2973  *     [0] [@2]: dpo-receive
2974  * ff02::1:ff00:0/104
2975  *   unicast-ip6-chain
2976  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2977  *     [0] [@2]: dpo-receive
2978  * @cliexend
2979  * @endparblock
2980 ?*/
2981 /* *INDENT-OFF* */
2982 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
2983 {
2984   .path = "set ip6 flow-hash",
2985   .short_help =
2986   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2987   .function = set_ip6_flow_hash_command_fn,
2988 };
2989 /* *INDENT-ON* */
2990
2991 static clib_error_t *
2992 show_ip6_local_command_fn (vlib_main_t * vm,
2993                            unformat_input_t * input, vlib_cli_command_t * cmd)
2994 {
2995   ip6_main_t *im = &ip6_main;
2996   ip_lookup_main_t *lm = &im->lookup_main;
2997   int i;
2998
2999   vlib_cli_output (vm, "Protocols handled by ip6_local");
3000   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
3001     {
3002       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
3003         {
3004
3005           u32 node_index = vlib_get_node (vm,
3006                                           ip6_local_node.index)->
3007             next_nodes[lm->local_next_by_ip_protocol[i]];
3008           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
3009                            node_index);
3010         }
3011     }
3012   return 0;
3013 }
3014
3015
3016
3017 /*?
3018  * Display the set of protocols handled by the local IPv6 stack.
3019  *
3020  * @cliexpar
3021  * Example of how to display local protocol table:
3022  * @cliexstart{show ip6 local}
3023  * Protocols handled by ip6_local
3024  * 17
3025  * 43
3026  * 58
3027  * 115
3028  * @cliexend
3029 ?*/
3030 /* *INDENT-OFF* */
3031 VLIB_CLI_COMMAND (show_ip6_local, static) =
3032 {
3033   .path = "show ip6 local",
3034   .function = show_ip6_local_command_fn,
3035   .short_help = "show ip6 local",
3036 };
3037 /* *INDENT-ON* */
3038
3039 #ifndef CLIB_MARCH_VARIANT
3040 int
3041 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3042                              u32 table_index)
3043 {
3044   vnet_main_t *vnm = vnet_get_main ();
3045   vnet_interface_main_t *im = &vnm->interface_main;
3046   ip6_main_t *ipm = &ip6_main;
3047   ip_lookup_main_t *lm = &ipm->lookup_main;
3048   vnet_classify_main_t *cm = &vnet_classify_main;
3049   ip6_address_t *if_addr;
3050
3051   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3052     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3053
3054   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3055     return VNET_API_ERROR_NO_SUCH_ENTRY;
3056
3057   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3058   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3059
3060   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3061
3062   if (NULL != if_addr)
3063     {
3064       fib_prefix_t pfx = {
3065         .fp_len = 128,
3066         .fp_proto = FIB_PROTOCOL_IP6,
3067         .fp_addr.ip6 = *if_addr,
3068       };
3069       u32 fib_index;
3070
3071       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3072                                                        sw_if_index);
3073
3074
3075       if (table_index != (u32) ~ 0)
3076         {
3077           dpo_id_t dpo = DPO_INVALID;
3078
3079           dpo_set (&dpo,
3080                    DPO_CLASSIFY,
3081                    DPO_PROTO_IP6,
3082                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3083
3084           fib_table_entry_special_dpo_add (fib_index,
3085                                            &pfx,
3086                                            FIB_SOURCE_CLASSIFY,
3087                                            FIB_ENTRY_FLAG_NONE, &dpo);
3088           dpo_reset (&dpo);
3089         }
3090       else
3091         {
3092           fib_table_entry_special_remove (fib_index,
3093                                           &pfx, FIB_SOURCE_CLASSIFY);
3094         }
3095     }
3096
3097   return 0;
3098 }
3099 #endif
3100
3101 static clib_error_t *
3102 set_ip6_classify_command_fn (vlib_main_t * vm,
3103                              unformat_input_t * input,
3104                              vlib_cli_command_t * cmd)
3105 {
3106   u32 table_index = ~0;
3107   int table_index_set = 0;
3108   u32 sw_if_index = ~0;
3109   int rv;
3110
3111   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3112     {
3113       if (unformat (input, "table-index %d", &table_index))
3114         table_index_set = 1;
3115       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3116                          vnet_get_main (), &sw_if_index))
3117         ;
3118       else
3119         break;
3120     }
3121
3122   if (table_index_set == 0)
3123     return clib_error_return (0, "classify table-index must be specified");
3124
3125   if (sw_if_index == ~0)
3126     return clib_error_return (0, "interface / subif must be specified");
3127
3128   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3129
3130   switch (rv)
3131     {
3132     case 0:
3133       break;
3134
3135     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3136       return clib_error_return (0, "No such interface");
3137
3138     case VNET_API_ERROR_NO_SUCH_ENTRY:
3139       return clib_error_return (0, "No such classifier table");
3140     }
3141   return 0;
3142 }
3143
3144 /*?
3145  * Assign a classification table to an interface. The classification
3146  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3147  * commands. Once the table is create, use this command to filter packets
3148  * on an interface.
3149  *
3150  * @cliexpar
3151  * Example of how to assign a classification table to an interface:
3152  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3153 ?*/
3154 /* *INDENT-OFF* */
3155 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3156 {
3157   .path = "set ip6 classify",
3158   .short_help =
3159   "set ip6 classify intfc <interface> table-index <classify-idx>",
3160   .function = set_ip6_classify_command_fn,
3161 };
3162 /* *INDENT-ON* */
3163
3164 static clib_error_t *
3165 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3166 {
3167   ip6_main_t *im = &ip6_main;
3168   uword heapsize = 0;
3169   u32 tmp;
3170   u32 nbuckets = 0;
3171
3172   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3173     {
3174       if (unformat (input, "hash-buckets %d", &tmp))
3175         nbuckets = tmp;
3176       else if (unformat (input, "heap-size %U",
3177                          unformat_memory_size, &heapsize))
3178         ;
3179       else
3180         return clib_error_return (0, "unknown input '%U'",
3181                                   format_unformat_error, input);
3182     }
3183
3184   im->lookup_table_nbuckets = nbuckets;
3185   im->lookup_table_size = heapsize;
3186
3187   return 0;
3188 }
3189
3190 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3191
3192 /*
3193  * fd.io coding-style-patch-verification: ON
3194  *
3195  * Local Variables:
3196  * eval: (c-set-style "gnu")
3197  * End:
3198  */