tcp: custom checksum calculations for Ipv4/Ipv6
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ip/ip6_neighbor.h>
44 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vppinfra/cache.h>
47 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
48 #include <vnet/fib/ip6_fib.h>
49 #include <vnet/mfib/ip6_mfib.h>
50 #include <vnet/dpo/load_balance_map.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 #ifndef CLIB_MARCH_VARIANT
54 #include <vppinfra/bihash_template.c>
55 #endif
56 #include <vnet/ip/ip6_forward.h>
57 #include <vnet/interface_output.h>
58
59 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
60 #define OI_DECAP   0x80000000
61
62 static void
63 ip6_add_interface_prefix_routes (ip6_main_t * im,
64                                  u32 sw_if_index,
65                                  u32 fib_index,
66                                  ip6_address_t * address, u32 address_length)
67 {
68   ip_lookup_main_t *lm = &im->lookup_main;
69   ip_interface_prefix_t *if_prefix;
70
71   ip_interface_prefix_key_t key = {
72     .prefix = {
73                .fp_len = address_length,
74                .fp_proto = FIB_PROTOCOL_IP6,
75                .fp_addr.ip6 = {
76                                .as_u64 = {
77                                           address->as_u64[0] &
78                                           im->fib_masks[address_length].
79                                           as_u64[0],
80                                           address->
81                                           as_u64[1] &
82                                           im->fib_masks[address_length].
83                                           as_u64[1],
84                                           },
85                                },
86                },
87     .sw_if_index = sw_if_index,
88   };
89
90   /* If prefix already set on interface, just increment ref count & return */
91   if_prefix = ip_get_interface_prefix (lm, &key);
92   if (if_prefix)
93     {
94       if_prefix->ref_count += 1;
95       return;
96     }
97
98   /* New prefix - allocate a pool entry, initialize it, add to the hash */
99   pool_get (lm->if_prefix_pool, if_prefix);
100   if_prefix->ref_count = 1;
101   clib_memcpy (&if_prefix->key, &key, sizeof (key));
102   mhash_set (&lm->prefix_to_if_prefix_index, &key,
103              if_prefix - lm->if_prefix_pool, 0 /* old value */ );
104
105   /* length < 128 - add glean */
106   if (address_length < 128)
107     {
108       /* set the glean route for the prefix */
109       fib_table_entry_update_one_path (fib_index, &key.prefix,
110                                        FIB_SOURCE_INTERFACE,
111                                        (FIB_ENTRY_FLAG_CONNECTED |
112                                         FIB_ENTRY_FLAG_ATTACHED),
113                                        DPO_PROTO_IP6,
114                                        /* No next-hop address */
115                                        NULL, sw_if_index,
116                                        /* invalid FIB index */
117                                        ~0, 1,
118                                        /* no out-label stack */
119                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
120     }
121 }
122
123 static void
124 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
125                           ip6_main_t * im, u32 fib_index,
126                           ip_interface_address_t * a)
127 {
128   ip_lookup_main_t *lm = &im->lookup_main;
129   ip6_address_t *address = ip_interface_address_get_address (lm, a);
130   fib_prefix_t pfx = {
131     .fp_len = a->address_length,
132     .fp_proto = FIB_PROTOCOL_IP6,
133     .fp_addr.ip6 = *address,
134   };
135
136   /* set special routes for the prefix if needed */
137   ip6_add_interface_prefix_routes (im, sw_if_index, fib_index,
138                                    address, a->address_length);
139
140   pfx.fp_len = 128;
141   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
142     {
143       u32 classify_table_index =
144         lm->classify_table_index_by_sw_if_index[sw_if_index];
145       if (classify_table_index != (u32) ~ 0)
146         {
147           dpo_id_t dpo = DPO_INVALID;
148
149           dpo_set (&dpo,
150                    DPO_CLASSIFY,
151                    DPO_PROTO_IP6,
152                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
153
154           fib_table_entry_special_dpo_add (fib_index,
155                                            &pfx,
156                                            FIB_SOURCE_CLASSIFY,
157                                            FIB_ENTRY_FLAG_NONE, &dpo);
158           dpo_reset (&dpo);
159         }
160     }
161
162   fib_table_entry_update_one_path (fib_index, &pfx,
163                                    FIB_SOURCE_INTERFACE,
164                                    (FIB_ENTRY_FLAG_CONNECTED |
165                                     FIB_ENTRY_FLAG_LOCAL),
166                                    DPO_PROTO_IP6,
167                                    &pfx.fp_addr,
168                                    sw_if_index, ~0,
169                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
170 }
171
172 static void
173 ip6_del_interface_prefix_routes (ip6_main_t * im,
174                                  u32 sw_if_index,
175                                  u32 fib_index,
176                                  ip6_address_t * address, u32 address_length)
177 {
178   ip_lookup_main_t *lm = &im->lookup_main;
179   ip_interface_prefix_t *if_prefix;
180
181   ip_interface_prefix_key_t key = {
182     .prefix = {
183                .fp_len = address_length,
184                .fp_proto = FIB_PROTOCOL_IP6,
185                .fp_addr.ip6 = {
186                                .as_u64 = {
187                                           address->as_u64[0] &
188                                           im->fib_masks[address_length].
189                                           as_u64[0],
190                                           address->
191                                           as_u64[1] &
192                                           im->fib_masks[address_length].
193                                           as_u64[1],
194                                           },
195                                },
196                },
197     .sw_if_index = sw_if_index,
198   };
199
200   if_prefix = ip_get_interface_prefix (lm, &key);
201   if (!if_prefix)
202     {
203       clib_warning ("Prefix not found while deleting %U",
204                     format_ip4_address_and_length, address, address_length);
205       return;
206     }
207
208   /* If not deleting last intf addr in prefix, decrement ref count & return */
209   if_prefix->ref_count -= 1;
210   if (if_prefix->ref_count > 0)
211     return;
212
213   /* length <= 30, delete glean route */
214   if (address_length <= 128)
215     {
216       /* remove glean route for prefix */
217       fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE);
218
219     }
220
221   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ );
222   pool_put (lm->if_prefix_pool, if_prefix);
223 }
224
225 static void
226 ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im,
227                           u32 fib_index,
228                           ip6_address_t * address, u32 address_length)
229 {
230   fib_prefix_t pfx = {
231     .fp_len = 128,
232     .fp_proto = FIB_PROTOCOL_IP6,
233     .fp_addr.ip6 = *address,
234   };
235
236   /* delete special routes for the prefix if needed */
237   ip6_del_interface_prefix_routes (im, sw_if_index, fib_index,
238                                    address, address_length);
239
240   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
241 }
242
243 #ifndef CLIB_MARCH_VARIANT
244 void
245 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
246 {
247   ip6_main_t *im = &ip6_main;
248
249   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
250
251   /*
252    * enable/disable only on the 1<->0 transition
253    */
254   if (is_enable)
255     {
256       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
257         return;
258     }
259   else
260     {
261       /* The ref count is 0 when an address is removed from an interface that has
262        * no address - this is not a ciritical error */
263       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
264           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
265         return;
266     }
267
268   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
269                                !is_enable, 0, 0);
270
271   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
272                                sw_if_index, !is_enable, 0, 0);
273 }
274
275 /* get first interface address */
276 ip6_address_t *
277 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
278 {
279   ip_lookup_main_t *lm = &im->lookup_main;
280   ip_interface_address_t *ia = 0;
281   ip6_address_t *result = 0;
282
283   /* *INDENT-OFF* */
284   foreach_ip_interface_address (lm, ia, sw_if_index,
285                                 1 /* honor unnumbered */,
286   ({
287     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
288     result = a;
289     break;
290   }));
291   /* *INDENT-ON* */
292   return result;
293 }
294
295 clib_error_t *
296 ip6_add_del_interface_address (vlib_main_t * vm,
297                                u32 sw_if_index,
298                                ip6_address_t * address,
299                                u32 address_length, u32 is_del)
300 {
301   vnet_main_t *vnm = vnet_get_main ();
302   ip6_main_t *im = &ip6_main;
303   ip_lookup_main_t *lm = &im->lookup_main;
304   clib_error_t *error;
305   u32 if_address_index;
306   ip6_address_fib_t ip6_af, *addr_fib = 0;
307   ip6_address_t ll_addr;
308
309   /* local0 interface doesn't support IP addressing */
310   if (sw_if_index == 0)
311     {
312       return
313         clib_error_create ("local0 interface doesn't support IP addressing");
314     }
315
316   if (ip6_address_is_link_local_unicast (address))
317     {
318       if (address_length != 128)
319         {
320           vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
321           return
322             clib_error_create
323             ("prefix length of link-local address must be 128");
324         }
325       if (!is_del)
326         {
327           return ip6_neighbor_set_link_local_address (vm, sw_if_index,
328                                                       address);
329         }
330       else
331         {
332           ll_addr = ip6_neighbor_get_link_local_address (sw_if_index);
333           if (ip6_address_is_equal (&ll_addr, address))
334             {
335               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE;
336               return clib_error_create ("address not deletable");
337             }
338           else
339             {
340               vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
341               return clib_error_create ("address not found");
342             }
343         }
344     }
345
346   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
347   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
348
349   ip6_addr_fib_init (&ip6_af, address,
350                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
351   vec_add1 (addr_fib, ip6_af);
352
353   /* *INDENT-OFF* */
354   if (!is_del)
355     {
356       /* When adding an address check that it does not conflict
357          with an existing address on any interface in this table. */
358       ip_interface_address_t *ia;
359       vnet_sw_interface_t *sif;
360
361       pool_foreach(sif, vnm->interface_main.sw_interfaces,
362       ({
363           if (im->fib_index_by_sw_if_index[sw_if_index] ==
364               im->fib_index_by_sw_if_index[sif->sw_if_index])
365             {
366               foreach_ip_interface_address
367                 (&im->lookup_main, ia, sif->sw_if_index,
368                  0 /* honor unnumbered */ ,
369                  ({
370                    ip6_address_t * x =
371                      ip_interface_address_get_address
372                      (&im->lookup_main, ia);
373                    if (ip6_destination_matches_route
374                        (im, address, x, ia->address_length) ||
375                        ip6_destination_matches_route (im,
376                                                       x,
377                                                       address,
378                                                       address_length))
379                      {
380                        /* an intf may have >1 addr from the same prefix */
381                        if ((sw_if_index == sif->sw_if_index) &&
382                            (ia->address_length == address_length) &&
383                            !ip6_address_is_equal (x, address))
384                          continue;
385
386                        /* error if the length or intf was different */
387                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
388                        return
389                          clib_error_create
390                          ("failed to add %U which conflicts with %U for interface %U",
391                           format_ip6_address_and_length, address,
392                           address_length,
393                           format_ip6_address_and_length, x,
394                           ia->address_length,
395                           format_vnet_sw_if_index_name, vnm,
396                           sif->sw_if_index);
397                      }
398                  }));
399             }
400       }));
401     }
402   /* *INDENT-ON* */
403
404   {
405     uword elts_before = pool_elts (lm->if_address_pool);
406
407     error = ip_interface_address_add_del
408       (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
409     if (error)
410       goto done;
411
412     /* Pool did not grow: add duplicate address. */
413     if (elts_before == pool_elts (lm->if_address_pool))
414       goto done;
415   }
416
417   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
418
419   if (is_del)
420     ip6_del_interface_routes (sw_if_index,
421                               im, ip6_af.fib_index, address, address_length);
422   else
423     ip6_add_interface_routes (vnm, sw_if_index,
424                               im, ip6_af.fib_index,
425                               pool_elt_at_index (lm->if_address_pool,
426                                                  if_address_index));
427
428   {
429     ip6_add_del_interface_address_callback_t *cb;
430     vec_foreach (cb, im->add_del_interface_address_callbacks)
431       cb->function (im, cb->function_opaque, sw_if_index,
432                     address, address_length, if_address_index, is_del);
433   }
434
435 done:
436   vec_free (addr_fib);
437   return error;
438 }
439
440 #endif
441
442 static clib_error_t *
443 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
444 {
445   ip6_main_t *im = &ip6_main;
446   ip_interface_address_t *ia;
447   ip6_address_t *a;
448   u32 is_admin_up, fib_index;
449
450   /* Fill in lookup tables with default table (0). */
451   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
452
453   vec_validate_init_empty (im->
454                            lookup_main.if_address_pool_index_by_sw_if_index,
455                            sw_if_index, ~0);
456
457   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
458
459   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
460
461   /* *INDENT-OFF* */
462   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
463                                 0 /* honor unnumbered */,
464   ({
465     a = ip_interface_address_get_address (&im->lookup_main, ia);
466     if (is_admin_up)
467       ip6_add_interface_routes (vnm, sw_if_index,
468                                 im, fib_index,
469                                 ia);
470     else
471       ip6_del_interface_routes (sw_if_index, im, fib_index,
472                                 a, ia->address_length);
473   }));
474   /* *INDENT-ON* */
475
476   return 0;
477 }
478
479 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
480
481 /* Built-in ip6 unicast rx feature path definition */
482 /* *INDENT-OFF* */
483 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
484 {
485   .arc_name  = "ip6-unicast",
486   .start_nodes = VNET_FEATURES ("ip6-input"),
487   .last_in_arc = "ip6-lookup",
488   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
489 };
490
491 VNET_FEATURE_INIT (ip6_flow_classify, static) =
492 {
493   .arc_name = "ip6-unicast",
494   .node_name = "ip6-flow-classify",
495   .runs_before = VNET_FEATURES ("ip6-inacl"),
496 };
497
498 VNET_FEATURE_INIT (ip6_inacl, static) =
499 {
500   .arc_name = "ip6-unicast",
501   .node_name = "ip6-inacl",
502   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
503 };
504
505 VNET_FEATURE_INIT (ip6_policer_classify, static) =
506 {
507   .arc_name = "ip6-unicast",
508   .node_name = "ip6-policer-classify",
509   .runs_before = VNET_FEATURES ("ipsec6-input-feature"),
510 };
511
512 VNET_FEATURE_INIT (ip6_ipsec, static) =
513 {
514   .arc_name = "ip6-unicast",
515   .node_name = "ipsec6-input-feature",
516   .runs_before = VNET_FEATURES ("l2tp-decap"),
517 };
518
519 VNET_FEATURE_INIT (ip6_l2tp, static) =
520 {
521   .arc_name = "ip6-unicast",
522   .node_name = "l2tp-decap",
523   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
524 };
525
526 VNET_FEATURE_INIT (ip6_vpath, static) =
527 {
528   .arc_name = "ip6-unicast",
529   .node_name = "vpath-input-ip6",
530   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
531 };
532
533 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
534 {
535   .arc_name = "ip6-unicast",
536   .node_name = "ip6-vxlan-bypass",
537   .runs_before = VNET_FEATURES ("ip6-lookup"),
538 };
539
540 VNET_FEATURE_INIT (ip6_not_enabled, static) =
541 {
542   .arc_name = "ip6-unicast",
543   .node_name = "ip6-not-enabled",
544   .runs_before = VNET_FEATURES ("ip6-lookup"),
545 };
546
547 VNET_FEATURE_INIT (ip6_lookup, static) =
548 {
549   .arc_name = "ip6-unicast",
550   .node_name = "ip6-lookup",
551   .runs_before = 0,  /*last feature*/
552 };
553
554 /* Built-in ip6 multicast rx feature path definition (none now) */
555 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
556 {
557   .arc_name  = "ip6-multicast",
558   .start_nodes = VNET_FEATURES ("ip6-input"),
559   .last_in_arc = "ip6-mfib-forward-lookup",
560   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
561 };
562
563 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
564   .arc_name = "ip6-multicast",
565   .node_name = "vpath-input-ip6",
566   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
567 };
568
569 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
570   .arc_name = "ip6-multicast",
571   .node_name = "ip6-not-enabled",
572   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
573 };
574
575 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
576   .arc_name = "ip6-multicast",
577   .node_name = "ip6-mfib-forward-lookup",
578   .runs_before = 0, /* last feature */
579 };
580
581 /* Built-in ip4 tx feature path definition */
582 VNET_FEATURE_ARC_INIT (ip6_output, static) =
583 {
584   .arc_name  = "ip6-output",
585   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
586   .last_in_arc = "interface-output",
587   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
588 };
589
590 VNET_FEATURE_INIT (ip6_outacl, static) = {
591   .arc_name = "ip6-output",
592   .node_name = "ip6-outacl",
593   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
594 };
595
596 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
597   .arc_name = "ip6-output",
598   .node_name = "ipsec6-output-feature",
599   .runs_before = VNET_FEATURES ("interface-output"),
600 };
601
602 VNET_FEATURE_INIT (ip6_interface_output, static) = {
603   .arc_name = "ip6-output",
604   .node_name = "interface-output",
605   .runs_before = 0, /* not before any other features */
606 };
607 /* *INDENT-ON* */
608
609 static clib_error_t *
610 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
611 {
612   ip6_main_t *im = &ip6_main;
613
614   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
615   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
616
617   if (!is_add)
618     {
619       /* Ensure that IPv6 is disabled */
620       ip6_main_t *im6 = &ip6_main;
621       ip_lookup_main_t *lm6 = &im6->lookup_main;
622       ip_interface_address_t *ia = 0;
623       ip6_address_t *address;
624       vlib_main_t *vm = vlib_get_main ();
625
626       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
627       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
628       /* *INDENT-OFF* */
629       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
630       ({
631         address = ip_interface_address_get_address (lm6, ia);
632         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
633       }));
634       /* *INDENT-ON* */
635       ip6_mfib_interface_enable_disable (sw_if_index, 0);
636     }
637
638   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
639                                is_add, 0, 0);
640
641   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
642                                sw_if_index, is_add, 0, 0);
643
644   return /* no error */ 0;
645 }
646
647 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
648
649 VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
650                                 vlib_node_runtime_t * node,
651                                 vlib_frame_t * frame)
652 {
653   return ip6_lookup_inline (vm, node, frame);
654 }
655
656 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
657
658 /* *INDENT-OFF* */
659 VLIB_REGISTER_NODE (ip6_lookup_node) =
660 {
661   .name = "ip6-lookup",
662   .vector_size = sizeof (u32),
663   .format_trace = format_ip6_lookup_trace,
664   .n_next_nodes = IP6_LOOKUP_N_NEXT,
665   .next_nodes = IP6_LOOKUP_NEXT_NODES,
666 };
667 /* *INDENT-ON* */
668
669 VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
670                                       vlib_node_runtime_t * node,
671                                       vlib_frame_t * frame)
672 {
673   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
674   u32 n_left, *from;
675   u32 thread_index = vm->thread_index;
676   ip6_main_t *im = &ip6_main;
677   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
678   u16 nexts[VLIB_FRAME_SIZE], *next;
679
680   from = vlib_frame_vector_args (frame);
681   n_left = frame->n_vectors;
682   next = nexts;
683
684   vlib_get_buffers (vm, from, bufs, n_left);
685
686   while (n_left >= 4)
687     {
688       const load_balance_t *lb0, *lb1;
689       const ip6_header_t *ip0, *ip1;
690       u32 lbi0, hc0, lbi1, hc1;
691       const dpo_id_t *dpo0, *dpo1;
692
693       /* Prefetch next iteration. */
694       {
695         vlib_prefetch_buffer_header (b[2], STORE);
696         vlib_prefetch_buffer_header (b[3], STORE);
697
698         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
699         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
700       }
701
702       ip0 = vlib_buffer_get_current (b[0]);
703       ip1 = vlib_buffer_get_current (b[1]);
704       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
705       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
706
707       lb0 = load_balance_get (lbi0);
708       lb1 = load_balance_get (lbi1);
709
710       /*
711        * this node is for via FIBs we can re-use the hash value from the
712        * to node if present.
713        * We don't want to use the same hash value at each level in the recursion
714        * graph as that would lead to polarisation
715        */
716       hc0 = hc1 = 0;
717
718       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
719         {
720           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
721             {
722               hc0 = vnet_buffer (b[0])->ip.flow_hash =
723                 vnet_buffer (b[0])->ip.flow_hash >> 1;
724             }
725           else
726             {
727               hc0 = vnet_buffer (b[0])->ip.flow_hash =
728                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
729             }
730           dpo0 = load_balance_get_fwd_bucket
731             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
732         }
733       else
734         {
735           dpo0 = load_balance_get_bucket_i (lb0, 0);
736         }
737       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
738         {
739           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
740             {
741               hc1 = vnet_buffer (b[1])->ip.flow_hash =
742                 vnet_buffer (b[1])->ip.flow_hash >> 1;
743             }
744           else
745             {
746               hc1 = vnet_buffer (b[1])->ip.flow_hash =
747                 ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
748             }
749           dpo1 = load_balance_get_fwd_bucket
750             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
751         }
752       else
753         {
754           dpo1 = load_balance_get_bucket_i (lb1, 0);
755         }
756
757       next[0] = dpo0->dpoi_next_node;
758       next[1] = dpo1->dpoi_next_node;
759
760       /* Only process the HBH Option Header if explicitly configured to do so */
761       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
762         {
763           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
764             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
765         }
766       /* Only process the HBH Option Header if explicitly configured to do so */
767       if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
768         {
769           next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
770             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1];
771         }
772
773       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
774       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
775
776       vlib_increment_combined_counter
777         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
778       vlib_increment_combined_counter
779         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
780
781       b += 2;
782       next += 2;
783       n_left -= 2;
784     }
785
786   while (n_left > 0)
787     {
788       const load_balance_t *lb0;
789       const ip6_header_t *ip0;
790       const dpo_id_t *dpo0;
791       u32 lbi0, hc0;
792
793       ip0 = vlib_buffer_get_current (b[0]);
794       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
795
796       lb0 = load_balance_get (lbi0);
797
798       hc0 = 0;
799       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
800         {
801           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
802             {
803               hc0 = vnet_buffer (b[0])->ip.flow_hash =
804                 vnet_buffer (b[0])->ip.flow_hash >> 1;
805             }
806           else
807             {
808               hc0 = vnet_buffer (b[0])->ip.flow_hash =
809                 ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
810             }
811           dpo0 = load_balance_get_fwd_bucket
812             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
813         }
814       else
815         {
816           dpo0 = load_balance_get_bucket_i (lb0, 0);
817         }
818
819       next[0] = dpo0->dpoi_next_node;
820       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
821
822       /* Only process the HBH Option Header if explicitly configured to do so */
823       if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
824         {
825           next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
826             (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0];
827         }
828
829       vlib_increment_combined_counter
830         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
831
832       b += 1;
833       next += 1;
834       n_left -= 1;
835     }
836
837   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
838
839   if (node->flags & VLIB_NODE_FLAG_TRACE)
840     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
841
842   return frame->n_vectors;
843 }
844
845 /* *INDENT-OFF* */
846 VLIB_REGISTER_NODE (ip6_load_balance_node) =
847 {
848   .name = "ip6-load-balance",
849   .vector_size = sizeof (u32),
850   .sibling_of = "ip6-lookup",
851   .format_trace = format_ip6_lookup_trace,
852 };
853 /* *INDENT-ON* */
854
855 typedef struct
856 {
857   /* Adjacency taken. */
858   u32 adj_index;
859   u32 flow_hash;
860   u32 fib_index;
861
862   /* Packet data, possibly *after* rewrite. */
863   u8 packet_data[128 - 1 * sizeof (u32)];
864 }
865 ip6_forward_next_trace_t;
866
867 #ifndef CLIB_MARCH_VARIANT
868 u8 *
869 format_ip6_forward_next_trace (u8 * s, va_list * args)
870 {
871   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
872   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
873   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
874   u32 indent = format_get_indent (s);
875
876   s = format (s, "%U%U",
877               format_white_space, indent,
878               format_ip6_header, t->packet_data, sizeof (t->packet_data));
879   return s;
880 }
881 #endif
882
883 static u8 *
884 format_ip6_lookup_trace (u8 * s, va_list * args)
885 {
886   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
887   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
888   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
889   u32 indent = format_get_indent (s);
890
891   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
892               t->fib_index, t->adj_index, t->flow_hash);
893   s = format (s, "\n%U%U",
894               format_white_space, indent,
895               format_ip6_header, t->packet_data, sizeof (t->packet_data));
896   return s;
897 }
898
899
900 static u8 *
901 format_ip6_rewrite_trace (u8 * s, va_list * args)
902 {
903   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
904   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
905   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
906   u32 indent = format_get_indent (s);
907
908   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
909               t->fib_index, t->adj_index, format_ip_adjacency,
910               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
911   s = format (s, "\n%U%U",
912               format_white_space, indent,
913               format_ip_adjacency_packet_data,
914               t->adj_index, t->packet_data, sizeof (t->packet_data));
915   return s;
916 }
917
918 /* Common trace function for all ip6-forward next nodes. */
919 #ifndef CLIB_MARCH_VARIANT
920 void
921 ip6_forward_next_trace (vlib_main_t * vm,
922                         vlib_node_runtime_t * node,
923                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
924 {
925   u32 *from, n_left;
926   ip6_main_t *im = &ip6_main;
927
928   n_left = frame->n_vectors;
929   from = vlib_frame_vector_args (frame);
930
931   while (n_left >= 4)
932     {
933       u32 bi0, bi1;
934       vlib_buffer_t *b0, *b1;
935       ip6_forward_next_trace_t *t0, *t1;
936
937       /* Prefetch next iteration. */
938       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
939       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
940
941       bi0 = from[0];
942       bi1 = from[1];
943
944       b0 = vlib_get_buffer (vm, bi0);
945       b1 = vlib_get_buffer (vm, bi1);
946
947       if (b0->flags & VLIB_BUFFER_IS_TRACED)
948         {
949           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
950           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
951           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
952           t0->fib_index =
953             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
954              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
955             vec_elt (im->fib_index_by_sw_if_index,
956                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
957
958           clib_memcpy_fast (t0->packet_data,
959                             vlib_buffer_get_current (b0),
960                             sizeof (t0->packet_data));
961         }
962       if (b1->flags & VLIB_BUFFER_IS_TRACED)
963         {
964           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
965           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
966           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
967           t1->fib_index =
968             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
969              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
970             vec_elt (im->fib_index_by_sw_if_index,
971                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
972
973           clib_memcpy_fast (t1->packet_data,
974                             vlib_buffer_get_current (b1),
975                             sizeof (t1->packet_data));
976         }
977       from += 2;
978       n_left -= 2;
979     }
980
981   while (n_left >= 1)
982     {
983       u32 bi0;
984       vlib_buffer_t *b0;
985       ip6_forward_next_trace_t *t0;
986
987       bi0 = from[0];
988
989       b0 = vlib_get_buffer (vm, bi0);
990
991       if (b0->flags & VLIB_BUFFER_IS_TRACED)
992         {
993           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
994           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
995           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
996           t0->fib_index =
997             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
998              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
999             vec_elt (im->fib_index_by_sw_if_index,
1000                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1001
1002           clib_memcpy_fast (t0->packet_data,
1003                             vlib_buffer_get_current (b0),
1004                             sizeof (t0->packet_data));
1005         }
1006       from += 1;
1007       n_left -= 1;
1008     }
1009 }
1010
1011 /* Compute TCP/UDP/ICMP6 checksum in software. */
1012 u16
1013 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1014                                    ip6_header_t * ip0, int *bogus_lengthp)
1015 {
1016   ip_csum_t sum0;
1017   u16 payload_length_host_byte_order;
1018   u32 i;
1019   u32 headers_size = sizeof (ip0[0]);
1020   u8 *data_this_buffer;
1021
1022   ASSERT (bogus_lengthp);
1023   *bogus_lengthp = 0;
1024
1025   /* Initialize checksum with ip header. */
1026   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1027   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1028   data_this_buffer = (u8 *) (ip0 + 1);
1029
1030   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1031     {
1032       sum0 = ip_csum_with_carry
1033         (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1034       sum0 = ip_csum_with_carry
1035         (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1036     }
1037
1038   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
1039    * or UDP-Ping packets */
1040   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1041     {
1042       u32 skip_bytes;
1043       ip6_hop_by_hop_ext_t *ext_hdr =
1044         (ip6_hop_by_hop_ext_t *) data_this_buffer;
1045
1046       /* validate really icmp6 next */
1047       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
1048               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
1049
1050       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
1051       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
1052
1053       payload_length_host_byte_order -= skip_bytes;
1054       headers_size += skip_bytes;
1055     }
1056
1057   if (p0)
1058     return ip_calculate_l4_checksum (vm, p0, sum0,
1059                                      payload_length_host_byte_order,
1060                                      (u8 *) ip0, headers_size, NULL);
1061   else
1062     return ip_calculate_l4_checksum (vm, 0, sum0,
1063                                      payload_length_host_byte_order, NULL, 0,
1064                                      data_this_buffer);
1065 }
1066
1067 u32
1068 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1069 {
1070   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
1071   udp_header_t *udp0;
1072   u16 sum16;
1073   int bogus_length;
1074
1075   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1076   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1077           || ip0->protocol == IP_PROTOCOL_ICMP6
1078           || ip0->protocol == IP_PROTOCOL_UDP
1079           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1080
1081   udp0 = (void *) (ip0 + 1);
1082   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1083     {
1084       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1085                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1086       return p0->flags;
1087     }
1088
1089   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1090
1091   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1092                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1093
1094   return p0->flags;
1095 }
1096 #endif
1097
1098 /**
1099  * @brief returns number of links on which src is reachable.
1100  */
1101 always_inline int
1102 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1103 {
1104   const load_balance_t *lb0;
1105   index_t lbi;
1106   u32 fib_index;
1107
1108   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1109                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1110   fib_index =
1111     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1112     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1113
1114   lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address);
1115   lb0 = load_balance_get (lbi);
1116
1117   return (fib_urpf_check_size (lb0->lb_urpf));
1118 }
1119
1120 always_inline u8
1121 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1122                            u32 * udp_offset0)
1123 {
1124   u32 proto0;
1125   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1126   if (proto0 != IP_PROTOCOL_UDP)
1127     {
1128       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1129       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1130     }
1131   return proto0;
1132 }
1133
1134 /* *INDENT-OFF* */
1135 VNET_FEATURE_ARC_INIT (ip6_local) =
1136 {
1137   .arc_name  = "ip6-local",
1138   .start_nodes = VNET_FEATURES ("ip6-local"),
1139 };
1140 /* *INDENT-ON* */
1141
1142 always_inline uword
1143 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1144                   vlib_frame_t * frame, int head_of_feature_arc)
1145 {
1146   ip6_main_t *im = &ip6_main;
1147   ip_lookup_main_t *lm = &im->lookup_main;
1148   u32 *from, n_left_from;
1149   vlib_node_runtime_t *error_node =
1150     vlib_node_get_runtime (vm, ip6_input_node.index);
1151   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1152   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1153   u16 nexts[VLIB_FRAME_SIZE], *next;
1154
1155   from = vlib_frame_vector_args (frame);
1156   n_left_from = frame->n_vectors;
1157
1158   if (node->flags & VLIB_NODE_FLAG_TRACE)
1159     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1160
1161   vlib_get_buffers (vm, from, bufs, n_left_from);
1162   b = bufs;
1163   next = nexts;
1164
1165   while (n_left_from > 2)
1166     {
1167       /* Prefetch next iteration. */
1168       if (n_left_from >= 6)
1169         {
1170           vlib_prefetch_buffer_header (b[4], STORE);
1171           vlib_prefetch_buffer_header (b[5], STORE);
1172           vlib_prefetch_buffer_data (b[2], LOAD);
1173           vlib_prefetch_buffer_data (b[3], LOAD);
1174         }
1175
1176       u8 error[2];
1177       error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1178       error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1179
1180       ip6_header_t *ip[2];
1181       ip[0] = vlib_buffer_get_current (b[0]);
1182       ip[1] = vlib_buffer_get_current (b[1]);
1183
1184       if (head_of_feature_arc)
1185         {
1186           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1187           vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1188
1189           u8 type[2];
1190           type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol];
1191           type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol];
1192
1193           u32 flags[2];
1194           flags[0] = b[0]->flags;
1195           flags[1] = b[1]->flags;
1196
1197           u32 good_l4_csum[2];
1198           good_l4_csum[0] =
1199             flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1200                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1201                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1202           good_l4_csum[1] =
1203             flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1204                         VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1205                         VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1206
1207           u32 udp_offset[2] = { };
1208           u8 is_tcp_udp[2];
1209           is_tcp_udp[0] =
1210             ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]);
1211           is_tcp_udp[1] =
1212             ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]);
1213           i16 len_diff[2] = { 0 };
1214           if (PREDICT_TRUE (is_tcp_udp[0]))
1215             {
1216               udp_header_t *udp =
1217                 (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]);
1218               good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP
1219                 && udp->checksum == 0;
1220               /* optimistically verify UDP length. */
1221               u16 ip_len, udp_len;
1222               ip_len = clib_net_to_host_u16 (ip[0]->payload_length);
1223               udp_len = clib_net_to_host_u16 (udp->length);
1224               len_diff[0] = ip_len - udp_len;
1225             }
1226           if (PREDICT_TRUE (is_tcp_udp[1]))
1227             {
1228               udp_header_t *udp =
1229                 (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]);
1230               good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP
1231                 && udp->checksum == 0;
1232               /* optimistically verify UDP length. */
1233               u16 ip_len, udp_len;
1234               ip_len = clib_net_to_host_u16 (ip[1]->payload_length);
1235               udp_len = clib_net_to_host_u16 (udp->length);
1236               len_diff[1] = ip_len - udp_len;
1237             }
1238
1239           good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1240           good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN;
1241
1242           len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0;
1243           len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0;
1244
1245           u8 need_csum[2];
1246           need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN
1247             && !good_l4_csum[0]
1248             && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1249           need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN
1250             && !good_l4_csum[1]
1251             && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1252           if (PREDICT_FALSE (need_csum[0]))
1253             {
1254               flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1255               good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1256             }
1257           if (PREDICT_FALSE (need_csum[1]))
1258             {
1259               flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]);
1260               good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1261             }
1262
1263           error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
1264           error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0];
1265           error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
1266           error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1];
1267
1268           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1269                          IP6_ERROR_UDP_CHECKSUM,
1270                          "Wrong IP6 errors constants");
1271           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1272                          IP6_ERROR_ICMP_CHECKSUM,
1273                          "Wrong IP6 errors constants");
1274
1275           error[0] =
1276             !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0];
1277           error[1] =
1278             !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1];
1279
1280           /* Drop packets from unroutable hosts. */
1281           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1282           u8 unroutable[2];
1283           unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL
1284             && type[0] != IP_BUILTIN_PROTOCOL_ICMP
1285             && !ip6_address_is_link_local_unicast (&ip[0]->src_address);
1286           unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL
1287             && type[1] != IP_BUILTIN_PROTOCOL_ICMP
1288             && !ip6_address_is_link_local_unicast (&ip[1]->src_address);
1289           if (PREDICT_FALSE (unroutable[0]))
1290             {
1291               error[0] =
1292                 !ip6_urpf_loose_check (im, b[0],
1293                                        ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS
1294                 : error[0];
1295             }
1296           if (PREDICT_FALSE (unroutable[1]))
1297             {
1298               error[1] =
1299                 !ip6_urpf_loose_check (im, b[1],
1300                                        ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS
1301                 : error[1];
1302             }
1303
1304           vnet_buffer (b[0])->ip.fib_index =
1305             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1306             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1307             vnet_buffer (b[0])->ip.fib_index;
1308           vnet_buffer (b[1])->ip.fib_index =
1309             vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1310             vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1311             vnet_buffer (b[1])->ip.fib_index;
1312         }                       /* head_of_feature_arc */
1313
1314       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1315       next[0] =
1316         error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1317       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1318       next[1] =
1319         error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1];
1320
1321       b[0]->error = error_node->errors[0];
1322       b[1]->error = error_node->errors[1];
1323
1324       if (head_of_feature_arc)
1325         {
1326           u8 ip6_unknown[2];
1327           ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1328           ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL;
1329           if (PREDICT_TRUE (ip6_unknown[0]))
1330             {
1331               u32 next32 = next[0];
1332               vnet_feature_arc_start (arc_index,
1333                                       vnet_buffer (b[0])->sw_if_index
1334                                       [VLIB_RX], &next32, b[0]);
1335               next[0] = next32;
1336             }
1337           if (PREDICT_TRUE (ip6_unknown[1]))
1338             {
1339               u32 next32 = next[1];
1340               vnet_feature_arc_start (arc_index,
1341                                       vnet_buffer (b[1])->sw_if_index
1342                                       [VLIB_RX], &next32, b[1]);
1343               next[1] = next32;
1344             }
1345         }
1346
1347       /* next */
1348       b += 2;
1349       next += 2;
1350       n_left_from -= 2;
1351     }
1352
1353   while (n_left_from)
1354     {
1355       u8 error;
1356       error = IP6_ERROR_UNKNOWN_PROTOCOL;
1357
1358       ip6_header_t *ip;
1359       ip = vlib_buffer_get_current (b[0]);
1360
1361       if (head_of_feature_arc)
1362         {
1363           vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1364           u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol];
1365
1366           u32 flags = b[0]->flags;
1367           u32 good_l4_csum =
1368             flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
1369                      VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
1370                      VNET_BUFFER_F_OFFLOAD_UDP_CKSUM);
1371
1372           u32 udp_offset;
1373           i16 len_diff = 0;
1374           u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset);
1375           if (PREDICT_TRUE (is_tcp_udp))
1376             {
1377               udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset);
1378               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1379               good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP
1380                 && udp->checksum == 0;
1381               /* optimistically verify UDP length. */
1382               u16 ip_len, udp_len;
1383               ip_len = clib_net_to_host_u16 (ip->payload_length);
1384               udp_len = clib_net_to_host_u16 (udp->length);
1385               len_diff = ip_len - udp_len;
1386             }
1387
1388           good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN;
1389           len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0;
1390
1391           u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum
1392             && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
1393           if (PREDICT_FALSE (need_csum))
1394             {
1395               flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]);
1396               good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
1397             }
1398
1399           error = IP6_ERROR_UNKNOWN_PROTOCOL;
1400           error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error;
1401
1402           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1403                          IP6_ERROR_UDP_CHECKSUM,
1404                          "Wrong IP6 errors constants");
1405           STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1406                          IP6_ERROR_ICMP_CHECKSUM,
1407                          "Wrong IP6 errors constants");
1408
1409           error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error;
1410
1411           /* Drop packets from unroutable hosts. */
1412           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1413           u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL
1414             && type != IP_BUILTIN_PROTOCOL_ICMP
1415             && !ip6_address_is_link_local_unicast (&ip->src_address);
1416           if (PREDICT_FALSE (unroutable))
1417             {
1418               error =
1419                 !ip6_urpf_loose_check (im, b[0],
1420                                        ip) ? IP6_ERROR_SRC_LOOKUP_MISS :
1421                 error;
1422             }
1423
1424           vnet_buffer (b[0])->ip.fib_index =
1425             vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1426             vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1427             vnet_buffer (b[0])->ip.fib_index;
1428         }                       /* head_of_feature_arc */
1429
1430       next[0] = lm->local_next_by_ip_protocol[ip->protocol];
1431       next[0] =
1432         error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0];
1433
1434       b[0]->error = error_node->errors[0];
1435
1436       if (head_of_feature_arc)
1437         {
1438           if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1439             {
1440               u32 next32 = next[0];
1441               vnet_feature_arc_start (arc_index,
1442                                       vnet_buffer (b[0])->sw_if_index
1443                                       [VLIB_RX], &next32, b[0]);
1444               next[0] = next32;
1445             }
1446         }
1447
1448       /* next */
1449       b += 1;
1450       next += 1;
1451       n_left_from -= 1;
1452     }
1453
1454   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1455   return frame->n_vectors;
1456 }
1457
1458 VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1459                                vlib_frame_t * frame)
1460 {
1461   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1462 }
1463
1464 /* *INDENT-OFF* */
1465 VLIB_REGISTER_NODE (ip6_local_node) =
1466 {
1467   .name = "ip6-local",
1468   .vector_size = sizeof (u32),
1469   .format_trace = format_ip6_forward_next_trace,
1470   .n_next_nodes = IP_LOCAL_N_NEXT,
1471   .next_nodes =
1472   {
1473     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1474     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1475     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1476     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1477     [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly",
1478   },
1479 };
1480 /* *INDENT-ON* */
1481
1482 VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
1483                                           vlib_node_runtime_t * node,
1484                                           vlib_frame_t * frame)
1485 {
1486   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1487 }
1488
1489 /* *INDENT-OFF* */
1490 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
1491   .name = "ip6-local-end-of-arc",
1492   .vector_size = sizeof (u32),
1493
1494   .format_trace = format_ip6_forward_next_trace,
1495   .sibling_of = "ip6-local",
1496 };
1497
1498 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1499   .arc_name = "ip6-local",
1500   .node_name = "ip6-local-end-of-arc",
1501   .runs_before = 0, /* not before any other features */
1502 };
1503 /* *INDENT-ON* */
1504
1505 #ifdef CLIB_MARCH_VARIANT
1506 extern vlib_node_registration_t ip6_local_node;
1507
1508 #else
1509
1510 void
1511 ip6_register_protocol (u32 protocol, u32 node_index)
1512 {
1513   vlib_main_t *vm = vlib_get_main ();
1514   ip6_main_t *im = &ip6_main;
1515   ip_lookup_main_t *lm = &im->lookup_main;
1516
1517   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1518   lm->local_next_by_ip_protocol[protocol] =
1519     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1520 }
1521
1522 void
1523 ip6_unregister_protocol (u32 protocol)
1524 {
1525   ip6_main_t *im = &ip6_main;
1526   ip_lookup_main_t *lm = &im->lookup_main;
1527
1528   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1529   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1530 }
1531
1532 clib_error_t *
1533 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index,
1534                     u8 refresh)
1535 {
1536   vnet_main_t *vnm = vnet_get_main ();
1537   ip6_main_t *im = &ip6_main;
1538   icmp6_neighbor_solicitation_header_t *h;
1539   ip6_address_t *src;
1540   ip_interface_address_t *ia;
1541   ip_adjacency_t *adj;
1542   vnet_hw_interface_t *hi;
1543   vnet_sw_interface_t *si;
1544   vlib_buffer_t *b;
1545   adj_index_t ai;
1546   u32 bi = 0;
1547   int bogus_length;
1548
1549   si = vnet_get_sw_interface (vnm, sw_if_index);
1550
1551   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1552     {
1553       return clib_error_return (0, "%U: interface %U down",
1554                                 format_ip6_address, dst,
1555                                 format_vnet_sw_if_index_name, vnm,
1556                                 sw_if_index);
1557     }
1558
1559   src =
1560     ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1561   if (!src)
1562     {
1563       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1564       return clib_error_return
1565         (0, "no matching interface address for destination %U (interface %U)",
1566          format_ip6_address, dst,
1567          format_vnet_sw_if_index_name, vnm, sw_if_index);
1568     }
1569
1570   h =
1571     vlib_packet_template_get_packet (vm,
1572                                      &im->discover_neighbor_packet_template,
1573                                      &bi);
1574   if (!h)
1575     return clib_error_return (0, "ICMP6 NS packet allocation failed");
1576
1577   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1578
1579   /* Destination address is a solicited node multicast address.  We need to fill in
1580      the low 24 bits with low 24 bits of target's address. */
1581   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1582   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1583   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1584
1585   h->ip.src_address = src[0];
1586   h->neighbor.target_address = dst[0];
1587
1588   if (PREDICT_FALSE (!hi->hw_address))
1589     {
1590       return clib_error_return (0, "%U: interface %U do not support ip probe",
1591                                 format_ip6_address, dst,
1592                                 format_vnet_sw_if_index_name, vnm,
1593                                 sw_if_index);
1594     }
1595
1596   clib_memcpy_fast (h->link_layer_option.ethernet_address, hi->hw_address,
1597                     vec_len (hi->hw_address));
1598
1599   h->neighbor.icmp.checksum =
1600     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1601   ASSERT (bogus_length == 0);
1602
1603   b = vlib_get_buffer (vm, bi);
1604   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1605     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1606
1607   /* Add encapsulation string for software interface (e.g. ethernet header). */
1608   ip46_address_t nh = {
1609     .ip6 = *dst,
1610   };
1611
1612   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
1613                             VNET_LINK_IP6, &nh, sw_if_index);
1614   adj = adj_get (ai);
1615
1616   /* Peer has been previously resolved, retrieve glean adj instead */
1617   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE && refresh == 0)
1618     {
1619       adj_unlock (ai);
1620       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
1621                                   VNET_LINK_IP6, sw_if_index, &nh);
1622       adj = adj_get (ai);
1623     }
1624
1625   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1626   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1627
1628   {
1629     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1630     u32 *to_next = vlib_frame_vector_args (f);
1631     to_next[0] = bi;
1632     f->n_vectors = 1;
1633     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1634   }
1635
1636   adj_unlock (ai);
1637   return /* no error */ 0;
1638 }
1639 #endif
1640
1641 typedef enum
1642 {
1643   IP6_REWRITE_NEXT_DROP,
1644   IP6_REWRITE_NEXT_ICMP_ERROR,
1645   IP6_REWRITE_NEXT_FRAGMENT,
1646   IP6_REWRITE_N_NEXT            /* Last */
1647 } ip6_rewrite_next_t;
1648
1649 /**
1650  * This bits of an IPv6 address to mask to construct a multicast
1651  * MAC address
1652  */
1653 #define IP6_MCAST_ADDR_MASK 0xffffffff
1654
1655 always_inline void
1656 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1657                u16 adj_packet_bytes, bool is_locally_generated,
1658                u32 * next, u32 * error)
1659 {
1660   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1661     {
1662       if (is_locally_generated)
1663         {
1664           /* IP fragmentation */
1665           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1666                                    IP6_FRAG_NEXT_IP6_REWRITE, 0);
1667           *next = IP6_REWRITE_NEXT_FRAGMENT;
1668           *error = IP6_ERROR_MTU_EXCEEDED;
1669         }
1670       else
1671         {
1672           *error = IP6_ERROR_MTU_EXCEEDED;
1673           icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1674                                        adj_packet_bytes);
1675           *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1676         }
1677     }
1678 }
1679
1680 always_inline uword
1681 ip6_rewrite_inline_with_gso (vlib_main_t * vm,
1682                              vlib_node_runtime_t * node,
1683                              vlib_frame_t * frame,
1684                              int do_counters, int is_midchain, int is_mcast,
1685                              int do_gso)
1686 {
1687   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1688   u32 *from = vlib_frame_vector_args (frame);
1689   u32 n_left_from, n_left_to_next, *to_next, next_index;
1690   vlib_node_runtime_t *error_node =
1691     vlib_node_get_runtime (vm, ip6_input_node.index);
1692
1693   n_left_from = frame->n_vectors;
1694   next_index = node->cached_next_index;
1695   u32 thread_index = vm->thread_index;
1696
1697   while (n_left_from > 0)
1698     {
1699       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1700
1701       while (n_left_from >= 4 && n_left_to_next >= 2)
1702         {
1703           ip_adjacency_t *adj0, *adj1;
1704           vlib_buffer_t *p0, *p1;
1705           ip6_header_t *ip0, *ip1;
1706           u32 pi0, rw_len0, next0, error0, adj_index0;
1707           u32 pi1, rw_len1, next1, error1, adj_index1;
1708           u32 tx_sw_if_index0, tx_sw_if_index1;
1709           bool is_locally_originated0, is_locally_originated1;
1710
1711           /* Prefetch next iteration. */
1712           {
1713             vlib_buffer_t *p2, *p3;
1714
1715             p2 = vlib_get_buffer (vm, from[2]);
1716             p3 = vlib_get_buffer (vm, from[3]);
1717
1718             vlib_prefetch_buffer_header (p2, LOAD);
1719             vlib_prefetch_buffer_header (p3, LOAD);
1720
1721             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1722             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1723
1724             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1725             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1726           }
1727
1728           pi0 = to_next[0] = from[0];
1729           pi1 = to_next[1] = from[1];
1730
1731           from += 2;
1732           n_left_from -= 2;
1733           to_next += 2;
1734           n_left_to_next -= 2;
1735
1736           p0 = vlib_get_buffer (vm, pi0);
1737           p1 = vlib_get_buffer (vm, pi1);
1738
1739           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1740           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1741
1742           ip0 = vlib_buffer_get_current (p0);
1743           ip1 = vlib_buffer_get_current (p1);
1744
1745           error0 = error1 = IP6_ERROR_NONE;
1746           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1747
1748           is_locally_originated0 =
1749             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1750           if (PREDICT_TRUE (!is_locally_originated0))
1751             {
1752               i32 hop_limit0 = ip0->hop_limit;
1753
1754               /* Input node should have reject packets with hop limit 0. */
1755               ASSERT (ip0->hop_limit > 0);
1756
1757               hop_limit0 -= 1;
1758
1759               ip0->hop_limit = hop_limit0;
1760
1761               /*
1762                * If the hop count drops below 1 when forwarding, generate
1763                * an ICMP response.
1764                */
1765               if (PREDICT_FALSE (hop_limit0 <= 0))
1766                 {
1767                   error0 = IP6_ERROR_TIME_EXPIRED;
1768                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1769                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1770                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1771                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1772                                                0);
1773                 }
1774             }
1775           else
1776             {
1777               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1778             }
1779           is_locally_originated1 =
1780             p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1781           if (PREDICT_TRUE (!is_locally_originated1))
1782             {
1783               i32 hop_limit1 = ip1->hop_limit;
1784
1785               /* Input node should have reject packets with hop limit 0. */
1786               ASSERT (ip1->hop_limit > 0);
1787
1788               hop_limit1 -= 1;
1789
1790               ip1->hop_limit = hop_limit1;
1791
1792               /*
1793                * If the hop count drops below 1 when forwarding, generate
1794                * an ICMP response.
1795                */
1796               if (PREDICT_FALSE (hop_limit1 <= 0))
1797                 {
1798                   error1 = IP6_ERROR_TIME_EXPIRED;
1799                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1800                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1801                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1802                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1803                                                0);
1804                 }
1805             }
1806           else
1807             {
1808               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1809             }
1810           adj0 = adj_get (adj_index0);
1811           adj1 = adj_get (adj_index1);
1812
1813           rw_len0 = adj0[0].rewrite_header.data_bytes;
1814           rw_len1 = adj1[0].rewrite_header.data_bytes;
1815           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1816           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1817
1818           if (do_counters)
1819             {
1820               vlib_increment_combined_counter
1821                 (&adjacency_counters,
1822                  thread_index, adj_index0, 1,
1823                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1824               vlib_increment_combined_counter
1825                 (&adjacency_counters,
1826                  thread_index, adj_index1, 1,
1827                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1828             }
1829
1830           /* Check MTU of outgoing interface. */
1831           u16 ip0_len =
1832             clib_net_to_host_u16 (ip0->payload_length) +
1833             sizeof (ip6_header_t);
1834           u16 ip1_len =
1835             clib_net_to_host_u16 (ip1->payload_length) +
1836             sizeof (ip6_header_t);
1837           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
1838             ip0_len = gso_mtu_sz (p0);
1839           if (do_gso && (p1->flags & VNET_BUFFER_F_GSO))
1840             ip1_len = gso_mtu_sz (p1);
1841
1842
1843
1844           ip6_mtu_check (p0, ip0_len,
1845                          adj0[0].rewrite_header.max_l3_packet_bytes,
1846                          is_locally_originated0, &next0, &error0);
1847           ip6_mtu_check (p1, ip1_len,
1848                          adj1[0].rewrite_header.max_l3_packet_bytes,
1849                          is_locally_originated1, &next1, &error1);
1850
1851           /* Don't adjust the buffer for hop count issue; icmp-error node
1852            * wants to see the IP header */
1853           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1854             {
1855               p0->current_data -= rw_len0;
1856               p0->current_length += rw_len0;
1857
1858               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1859               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1860               next0 = adj0[0].rewrite_header.next_index;
1861
1862               if (PREDICT_FALSE
1863                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1864                 vnet_feature_arc_start (lm->output_feature_arc_index,
1865                                         tx_sw_if_index0, &next0, p0);
1866             }
1867           else
1868             {
1869               p0->error = error_node->errors[error0];
1870             }
1871           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1872             {
1873               p1->current_data -= rw_len1;
1874               p1->current_length += rw_len1;
1875
1876               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1877               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1878               next1 = adj1[0].rewrite_header.next_index;
1879
1880               if (PREDICT_FALSE
1881                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1882                 vnet_feature_arc_start (lm->output_feature_arc_index,
1883                                         tx_sw_if_index1, &next1, p1);
1884             }
1885           else
1886             {
1887               p1->error = error_node->errors[error1];
1888             }
1889
1890           if (is_midchain)
1891             {
1892               /* before we paint on the next header, update the L4
1893                * checksums if required, since there's no offload on a tunnel */
1894               calc_checksums (vm, p0);
1895               calc_checksums (vm, p1);
1896             }
1897
1898           /* Guess we are only writing on simple Ethernet header. */
1899           vnet_rewrite_two_headers (adj0[0], adj1[0],
1900                                     ip0, ip1, sizeof (ethernet_header_t));
1901
1902           if (is_midchain)
1903             {
1904               if (adj0->sub_type.midchain.fixup_func)
1905                 adj0->sub_type.midchain.fixup_func
1906                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1907               if (adj1->sub_type.midchain.fixup_func)
1908                 adj1->sub_type.midchain.fixup_func
1909                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1910             }
1911           if (is_mcast)
1912             {
1913               /*
1914                * copy bytes from the IP address into the MAC rewrite
1915                */
1916               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1917                                           adj0->
1918                                           rewrite_header.dst_mcast_offset,
1919                                           &ip0->dst_address.as_u32[3],
1920                                           (u8 *) ip0);
1921               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1922                                           adj1->
1923                                           rewrite_header.dst_mcast_offset,
1924                                           &ip1->dst_address.as_u32[3],
1925                                           (u8 *) ip1);
1926             }
1927
1928           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1929                                            to_next, n_left_to_next,
1930                                            pi0, pi1, next0, next1);
1931         }
1932
1933       while (n_left_from > 0 && n_left_to_next > 0)
1934         {
1935           ip_adjacency_t *adj0;
1936           vlib_buffer_t *p0;
1937           ip6_header_t *ip0;
1938           u32 pi0, rw_len0;
1939           u32 adj_index0, next0, error0;
1940           u32 tx_sw_if_index0;
1941           bool is_locally_originated0;
1942
1943           pi0 = to_next[0] = from[0];
1944
1945           p0 = vlib_get_buffer (vm, pi0);
1946
1947           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1948
1949           adj0 = adj_get (adj_index0);
1950
1951           ip0 = vlib_buffer_get_current (p0);
1952
1953           error0 = IP6_ERROR_NONE;
1954           next0 = IP6_REWRITE_NEXT_DROP;
1955
1956           /* Check hop limit */
1957           is_locally_originated0 =
1958             p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
1959           if (PREDICT_TRUE (!is_locally_originated0))
1960             {
1961               i32 hop_limit0 = ip0->hop_limit;
1962
1963               ASSERT (ip0->hop_limit > 0);
1964
1965               hop_limit0 -= 1;
1966
1967               ip0->hop_limit = hop_limit0;
1968
1969               if (PREDICT_FALSE (hop_limit0 <= 0))
1970                 {
1971                   /*
1972                    * If the hop count drops below 1 when forwarding, generate
1973                    * an ICMP response.
1974                    */
1975                   error0 = IP6_ERROR_TIME_EXPIRED;
1976                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1977                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1978                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1979                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1980                                                0);
1981                 }
1982             }
1983           else
1984             {
1985               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1986             }
1987
1988           if (is_midchain)
1989             {
1990               calc_checksums (vm, p0);
1991             }
1992
1993           /* Guess we are only writing on simple Ethernet header. */
1994           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1995
1996           /* Update packet buffer attributes/set output interface. */
1997           rw_len0 = adj0[0].rewrite_header.data_bytes;
1998           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1999
2000           if (do_counters)
2001             {
2002               vlib_increment_combined_counter
2003                 (&adjacency_counters,
2004                  thread_index, adj_index0, 1,
2005                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2006             }
2007
2008           /* Check MTU of outgoing interface. */
2009           u16 ip0_len =
2010             clib_net_to_host_u16 (ip0->payload_length) +
2011             sizeof (ip6_header_t);
2012           if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
2013             ip0_len = gso_mtu_sz (p0);
2014
2015           ip6_mtu_check (p0, ip0_len,
2016                          adj0[0].rewrite_header.max_l3_packet_bytes,
2017                          is_locally_originated0, &next0, &error0);
2018
2019           /* Don't adjust the buffer for hop count issue; icmp-error node
2020            * wants to see the IP header */
2021           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
2022             {
2023               p0->current_data -= rw_len0;
2024               p0->current_length += rw_len0;
2025
2026               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2027
2028               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2029               next0 = adj0[0].rewrite_header.next_index;
2030
2031               if (PREDICT_FALSE
2032                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2033                 vnet_feature_arc_start (lm->output_feature_arc_index,
2034                                         tx_sw_if_index0, &next0, p0);
2035             }
2036           else
2037             {
2038               p0->error = error_node->errors[error0];
2039             }
2040
2041           if (is_midchain)
2042             {
2043               if (adj0->sub_type.midchain.fixup_func)
2044                 adj0->sub_type.midchain.fixup_func
2045                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2046             }
2047           if (is_mcast)
2048             {
2049               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
2050                                           adj0->
2051                                           rewrite_header.dst_mcast_offset,
2052                                           &ip0->dst_address.as_u32[3],
2053                                           (u8 *) ip0);
2054             }
2055
2056           from += 1;
2057           n_left_from -= 1;
2058           to_next += 1;
2059           n_left_to_next -= 1;
2060
2061           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2062                                            to_next, n_left_to_next,
2063                                            pi0, next0);
2064         }
2065
2066       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2067     }
2068
2069   /* Need to do trace after rewrites to pick up new packet data. */
2070   if (node->flags & VLIB_NODE_FLAG_TRACE)
2071     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
2072
2073   return frame->n_vectors;
2074 }
2075
2076 always_inline uword
2077 ip6_rewrite_inline (vlib_main_t * vm,
2078                     vlib_node_runtime_t * node,
2079                     vlib_frame_t * frame,
2080                     int do_counters, int is_midchain, int is_mcast)
2081 {
2082   vnet_main_t *vnm = vnet_get_main ();
2083   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2084     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2085                                         is_midchain, is_mcast,
2086                                         1 /* do_gso */ );
2087   else
2088     return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
2089                                         is_midchain, is_mcast,
2090                                         0 /* no do_gso */ );
2091 }
2092
2093 VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
2094                                  vlib_node_runtime_t * node,
2095                                  vlib_frame_t * frame)
2096 {
2097   if (adj_are_counters_enabled ())
2098     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2099   else
2100     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2101 }
2102
2103 VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm,
2104                                        vlib_node_runtime_t * node,
2105                                        vlib_frame_t * frame)
2106 {
2107   if (adj_are_counters_enabled ())
2108     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
2109   else
2110     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
2111 }
2112
2113 VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm,
2114                                        vlib_node_runtime_t * node,
2115                                        vlib_frame_t * frame)
2116 {
2117   if (adj_are_counters_enabled ())
2118     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
2119   else
2120     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
2121 }
2122
2123 VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm,
2124                                   vlib_node_runtime_t * node,
2125                                   vlib_frame_t * frame)
2126 {
2127   if (adj_are_counters_enabled ())
2128     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
2129   else
2130     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
2131 }
2132
2133 VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
2134                                         vlib_node_runtime_t * node,
2135                                         vlib_frame_t * frame)
2136 {
2137   if (adj_are_counters_enabled ())
2138     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
2139   else
2140     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
2141 }
2142
2143 /* *INDENT-OFF* */
2144 VLIB_REGISTER_NODE (ip6_midchain_node) =
2145 {
2146   .name = "ip6-midchain",
2147   .vector_size = sizeof (u32),
2148   .format_trace = format_ip6_forward_next_trace,
2149   .sibling_of = "ip6-rewrite",
2150   };
2151
2152 VLIB_REGISTER_NODE (ip6_rewrite_node) =
2153 {
2154   .name = "ip6-rewrite",
2155   .vector_size = sizeof (u32),
2156   .format_trace = format_ip6_rewrite_trace,
2157   .n_next_nodes = IP6_REWRITE_N_NEXT,
2158   .next_nodes =
2159   {
2160     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
2161     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2162     [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag",
2163   },
2164 };
2165
2166 VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = {
2167   .name = "ip6-rewrite-bcast",
2168   .vector_size = sizeof (u32),
2169
2170   .format_trace = format_ip6_rewrite_trace,
2171   .sibling_of = "ip6-rewrite",
2172 };
2173
2174 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
2175 {
2176   .name = "ip6-rewrite-mcast",
2177   .vector_size = sizeof (u32),
2178   .format_trace = format_ip6_rewrite_trace,
2179   .sibling_of = "ip6-rewrite",
2180 };
2181
2182
2183 VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
2184 {
2185   .name = "ip6-mcast-midchain",
2186   .vector_size = sizeof (u32),
2187   .format_trace = format_ip6_rewrite_trace,
2188   .sibling_of = "ip6-rewrite",
2189 };
2190
2191 /* *INDENT-ON* */
2192
2193 /*
2194  * Hop-by-Hop handling
2195  */
2196 #ifndef CLIB_MARCH_VARIANT
2197 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2198 #endif /* CLIB_MARCH_VARIANT */
2199
2200 #define foreach_ip6_hop_by_hop_error \
2201 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2202 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2203 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2204
2205 /* *INDENT-OFF* */
2206 typedef enum
2207 {
2208 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2209   foreach_ip6_hop_by_hop_error
2210 #undef _
2211   IP6_HOP_BY_HOP_N_ERROR,
2212 } ip6_hop_by_hop_error_t;
2213 /* *INDENT-ON* */
2214
2215 /*
2216  * Primary h-b-h handler trace support
2217  * We work pretty hard on the problem for obvious reasons
2218  */
2219 typedef struct
2220 {
2221   u32 next_index;
2222   u32 trace_len;
2223   u8 option_data[256];
2224 } ip6_hop_by_hop_trace_t;
2225
2226 extern vlib_node_registration_t ip6_hop_by_hop_node;
2227
2228 static char *ip6_hop_by_hop_error_strings[] = {
2229 #define _(sym,string) string,
2230   foreach_ip6_hop_by_hop_error
2231 #undef _
2232 };
2233
2234 #ifndef CLIB_MARCH_VARIANT
2235 u8 *
2236 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2237 {
2238   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2239   int total_len = va_arg (*args, int);
2240   ip6_hop_by_hop_option_t *opt0, *limit0;
2241   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2242   u8 type0;
2243
2244   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2245               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2246
2247   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2248   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2249
2250   while (opt0 < limit0)
2251     {
2252       type0 = opt0->type;
2253       switch (type0)
2254         {
2255         case 0:         /* Pad, just stop */
2256           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2257           break;
2258
2259         default:
2260           if (hm->trace[type0])
2261             {
2262               s = (*hm->trace[type0]) (s, opt0);
2263             }
2264           else
2265             {
2266               s =
2267                 format (s, "\n    unrecognized option %d length %d", type0,
2268                         opt0->length);
2269             }
2270           opt0 =
2271             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2272                                          sizeof (ip6_hop_by_hop_option_t));
2273           break;
2274         }
2275     }
2276   return s;
2277 }
2278 #endif
2279
2280 static u8 *
2281 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2282 {
2283   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2284   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2285   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2286   ip6_hop_by_hop_header_t *hbh0;
2287   ip6_hop_by_hop_option_t *opt0, *limit0;
2288   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2289
2290   u8 type0;
2291
2292   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2293
2294   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2295               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2296
2297   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2298   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2299
2300   while (opt0 < limit0)
2301     {
2302       type0 = opt0->type;
2303       switch (type0)
2304         {
2305         case 0:         /* Pad, just stop */
2306           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2307           break;
2308
2309         default:
2310           if (hm->trace[type0])
2311             {
2312               s = (*hm->trace[type0]) (s, opt0);
2313             }
2314           else
2315             {
2316               s =
2317                 format (s, "\n    unrecognized option %d length %d", type0,
2318                         opt0->length);
2319             }
2320           opt0 =
2321             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2322                                          sizeof (ip6_hop_by_hop_option_t));
2323           break;
2324         }
2325     }
2326   return s;
2327 }
2328
2329 always_inline u8
2330 ip6_scan_hbh_options (vlib_buffer_t * b0,
2331                       ip6_header_t * ip0,
2332                       ip6_hop_by_hop_header_t * hbh0,
2333                       ip6_hop_by_hop_option_t * opt0,
2334                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2335 {
2336   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2337   u8 type0;
2338   u8 error0 = 0;
2339
2340   while (opt0 < limit0)
2341     {
2342       type0 = opt0->type;
2343       switch (type0)
2344         {
2345         case 0:         /* Pad1 */
2346           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2347           continue;
2348         case 1:         /* PadN */
2349           break;
2350         default:
2351           if (hm->options[type0])
2352             {
2353               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2354                 {
2355                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2356                   return (error0);
2357                 }
2358             }
2359           else
2360             {
2361               /* Unrecognized mandatory option, check the two high order bits */
2362               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2363                 {
2364                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2365                   break;
2366                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2367                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2368                   *next0 = IP_LOOKUP_NEXT_DROP;
2369                   break;
2370                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2371                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2372                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2373                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2374                                                ICMP6_parameter_problem_unrecognized_option,
2375                                                (u8 *) opt0 - (u8 *) ip0);
2376                   break;
2377                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2378                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2379                   if (!ip6_address_is_multicast (&ip0->dst_address))
2380                     {
2381                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2382                       icmp6_error_set_vnet_buffer (b0,
2383                                                    ICMP6_parameter_problem,
2384                                                    ICMP6_parameter_problem_unrecognized_option,
2385                                                    (u8 *) opt0 - (u8 *) ip0);
2386                     }
2387                   else
2388                     {
2389                       *next0 = IP_LOOKUP_NEXT_DROP;
2390                     }
2391                   break;
2392                 }
2393               return (error0);
2394             }
2395         }
2396       opt0 =
2397         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2398                                      sizeof (ip6_hop_by_hop_option_t));
2399     }
2400   return (error0);
2401 }
2402
2403 /*
2404  * Process the Hop-by-Hop Options header
2405  */
2406 VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
2407                                     vlib_node_runtime_t * node,
2408                                     vlib_frame_t * frame)
2409 {
2410   vlib_node_runtime_t *error_node =
2411     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2412   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2413   u32 n_left_from, *from, *to_next;
2414   ip_lookup_next_t next_index;
2415
2416   from = vlib_frame_vector_args (frame);
2417   n_left_from = frame->n_vectors;
2418   next_index = node->cached_next_index;
2419
2420   while (n_left_from > 0)
2421     {
2422       u32 n_left_to_next;
2423
2424       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2425
2426       while (n_left_from >= 4 && n_left_to_next >= 2)
2427         {
2428           u32 bi0, bi1;
2429           vlib_buffer_t *b0, *b1;
2430           u32 next0, next1;
2431           ip6_header_t *ip0, *ip1;
2432           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2433           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2434           u8 error0 = 0, error1 = 0;
2435
2436           /* Prefetch next iteration. */
2437           {
2438             vlib_buffer_t *p2, *p3;
2439
2440             p2 = vlib_get_buffer (vm, from[2]);
2441             p3 = vlib_get_buffer (vm, from[3]);
2442
2443             vlib_prefetch_buffer_header (p2, LOAD);
2444             vlib_prefetch_buffer_header (p3, LOAD);
2445
2446             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2447             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2448           }
2449
2450           /* Speculatively enqueue b0, b1 to the current next frame */
2451           to_next[0] = bi0 = from[0];
2452           to_next[1] = bi1 = from[1];
2453           from += 2;
2454           to_next += 2;
2455           n_left_from -= 2;
2456           n_left_to_next -= 2;
2457
2458           b0 = vlib_get_buffer (vm, bi0);
2459           b1 = vlib_get_buffer (vm, bi1);
2460
2461           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2462           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2463           ip_adjacency_t *adj0 = adj_get (adj_index0);
2464           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2465           ip_adjacency_t *adj1 = adj_get (adj_index1);
2466
2467           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2468           next0 = adj0->lookup_next_index;
2469           next1 = adj1->lookup_next_index;
2470
2471           ip0 = vlib_buffer_get_current (b0);
2472           ip1 = vlib_buffer_get_current (b1);
2473           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2474           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2475           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2476           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2477           limit0 =
2478             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2479                                          ((hbh0->length + 1) << 3));
2480           limit1 =
2481             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2482                                          ((hbh1->length + 1) << 3));
2483
2484           /*
2485            * Basic validity checks
2486            */
2487           if ((hbh0->length + 1) << 3 >
2488               clib_net_to_host_u16 (ip0->payload_length))
2489             {
2490               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2491               next0 = IP_LOOKUP_NEXT_DROP;
2492               goto outdual;
2493             }
2494           /* Scan the set of h-b-h options, process ones that we understand */
2495           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2496
2497           if ((hbh1->length + 1) << 3 >
2498               clib_net_to_host_u16 (ip1->payload_length))
2499             {
2500               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2501               next1 = IP_LOOKUP_NEXT_DROP;
2502               goto outdual;
2503             }
2504           /* Scan the set of h-b-h options, process ones that we understand */
2505           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2506
2507         outdual:
2508           /* Has the classifier flagged this buffer for special treatment? */
2509           if (PREDICT_FALSE
2510               ((error0 == 0)
2511                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2512             next0 = hm->next_override;
2513
2514           /* Has the classifier flagged this buffer for special treatment? */
2515           if (PREDICT_FALSE
2516               ((error1 == 0)
2517                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2518             next1 = hm->next_override;
2519
2520           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2521             {
2522               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2523                 {
2524                   ip6_hop_by_hop_trace_t *t =
2525                     vlib_add_trace (vm, node, b0, sizeof (*t));
2526                   u32 trace_len = (hbh0->length + 1) << 3;
2527                   t->next_index = next0;
2528                   /* Capture the h-b-h option verbatim */
2529                   trace_len =
2530                     trace_len <
2531                     ARRAY_LEN (t->option_data) ? trace_len :
2532                     ARRAY_LEN (t->option_data);
2533                   t->trace_len = trace_len;
2534                   clib_memcpy_fast (t->option_data, hbh0, trace_len);
2535                 }
2536               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2537                 {
2538                   ip6_hop_by_hop_trace_t *t =
2539                     vlib_add_trace (vm, node, b1, sizeof (*t));
2540                   u32 trace_len = (hbh1->length + 1) << 3;
2541                   t->next_index = next1;
2542                   /* Capture the h-b-h option verbatim */
2543                   trace_len =
2544                     trace_len <
2545                     ARRAY_LEN (t->option_data) ? trace_len :
2546                     ARRAY_LEN (t->option_data);
2547                   t->trace_len = trace_len;
2548                   clib_memcpy_fast (t->option_data, hbh1, trace_len);
2549                 }
2550
2551             }
2552
2553           b0->error = error_node->errors[error0];
2554           b1->error = error_node->errors[error1];
2555
2556           /* verify speculative enqueue, maybe switch current next frame */
2557           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2558                                            n_left_to_next, bi0, bi1, next0,
2559                                            next1);
2560         }
2561
2562       while (n_left_from > 0 && n_left_to_next > 0)
2563         {
2564           u32 bi0;
2565           vlib_buffer_t *b0;
2566           u32 next0;
2567           ip6_header_t *ip0;
2568           ip6_hop_by_hop_header_t *hbh0;
2569           ip6_hop_by_hop_option_t *opt0, *limit0;
2570           u8 error0 = 0;
2571
2572           /* Speculatively enqueue b0 to the current next frame */
2573           bi0 = from[0];
2574           to_next[0] = bi0;
2575           from += 1;
2576           to_next += 1;
2577           n_left_from -= 1;
2578           n_left_to_next -= 1;
2579
2580           b0 = vlib_get_buffer (vm, bi0);
2581           /*
2582            * Default use the next_index from the adjacency.
2583            * A HBH option rarely redirects to a different node
2584            */
2585           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2586           ip_adjacency_t *adj0 = adj_get (adj_index0);
2587           next0 = adj0->lookup_next_index;
2588
2589           ip0 = vlib_buffer_get_current (b0);
2590           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2591           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2592           limit0 =
2593             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2594                                          ((hbh0->length + 1) << 3));
2595
2596           /*
2597            * Basic validity checks
2598            */
2599           if ((hbh0->length + 1) << 3 >
2600               clib_net_to_host_u16 (ip0->payload_length))
2601             {
2602               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2603               next0 = IP_LOOKUP_NEXT_DROP;
2604               goto out0;
2605             }
2606
2607           /* Scan the set of h-b-h options, process ones that we understand */
2608           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2609
2610         out0:
2611           /* Has the classifier flagged this buffer for special treatment? */
2612           if (PREDICT_FALSE
2613               ((error0 == 0)
2614                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2615             next0 = hm->next_override;
2616
2617           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2618             {
2619               ip6_hop_by_hop_trace_t *t =
2620                 vlib_add_trace (vm, node, b0, sizeof (*t));
2621               u32 trace_len = (hbh0->length + 1) << 3;
2622               t->next_index = next0;
2623               /* Capture the h-b-h option verbatim */
2624               trace_len =
2625                 trace_len <
2626                 ARRAY_LEN (t->option_data) ? trace_len :
2627                 ARRAY_LEN (t->option_data);
2628               t->trace_len = trace_len;
2629               clib_memcpy_fast (t->option_data, hbh0, trace_len);
2630             }
2631
2632           b0->error = error_node->errors[error0];
2633
2634           /* verify speculative enqueue, maybe switch current next frame */
2635           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2636                                            n_left_to_next, bi0, next0);
2637         }
2638       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2639     }
2640   return frame->n_vectors;
2641 }
2642
2643 /* *INDENT-OFF* */
2644 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2645 {
2646   .name = "ip6-hop-by-hop",
2647   .sibling_of = "ip6-lookup",
2648   .vector_size = sizeof (u32),
2649   .format_trace = format_ip6_hop_by_hop_trace,
2650   .type = VLIB_NODE_TYPE_INTERNAL,
2651   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2652   .error_strings = ip6_hop_by_hop_error_strings,
2653   .n_next_nodes = 0,
2654 };
2655 /* *INDENT-ON* */
2656
2657 static clib_error_t *
2658 ip6_hop_by_hop_init (vlib_main_t * vm)
2659 {
2660   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2661   clib_memset (hm->options, 0, sizeof (hm->options));
2662   clib_memset (hm->trace, 0, sizeof (hm->trace));
2663   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2664   return (0);
2665 }
2666
2667 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2668
2669 #ifndef CLIB_MARCH_VARIANT
2670 void
2671 ip6_hbh_set_next_override (uword next)
2672 {
2673   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2674
2675   hm->next_override = next;
2676 }
2677
2678 int
2679 ip6_hbh_register_option (u8 option,
2680                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2681                                       ip6_hop_by_hop_option_t * opt),
2682                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2683 {
2684   ip6_main_t *im = &ip6_main;
2685   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2686
2687   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2688
2689   /* Already registered */
2690   if (hm->options[option])
2691     return (-1);
2692
2693   hm->options[option] = options;
2694   hm->trace[option] = trace;
2695
2696   /* Set global variable */
2697   im->hbh_enabled = 1;
2698
2699   return (0);
2700 }
2701
2702 int
2703 ip6_hbh_unregister_option (u8 option)
2704 {
2705   ip6_main_t *im = &ip6_main;
2706   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2707
2708   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2709
2710   /* Not registered */
2711   if (!hm->options[option])
2712     return (-1);
2713
2714   hm->options[option] = NULL;
2715   hm->trace[option] = NULL;
2716
2717   /* Disable global knob if this was the last option configured */
2718   int i;
2719   bool found = false;
2720   for (i = 0; i < 256; i++)
2721     {
2722       if (hm->options[option])
2723         {
2724           found = true;
2725           break;
2726         }
2727     }
2728   if (!found)
2729     im->hbh_enabled = 0;
2730
2731   return (0);
2732 }
2733
2734 /* Global IP6 main. */
2735 ip6_main_t ip6_main;
2736 #endif
2737
2738 static clib_error_t *
2739 ip6_lookup_init (vlib_main_t * vm)
2740 {
2741   ip6_main_t *im = &ip6_main;
2742   clib_error_t *error;
2743   uword i;
2744
2745   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2746     return error;
2747
2748   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2749     {
2750       u32 j, i0, i1;
2751
2752       i0 = i / 32;
2753       i1 = i % 32;
2754
2755       for (j = 0; j < i0; j++)
2756         im->fib_masks[i].as_u32[j] = ~0;
2757
2758       if (i1)
2759         im->fib_masks[i].as_u32[i0] =
2760           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2761     }
2762
2763   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2764
2765   if (im->lookup_table_nbuckets == 0)
2766     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2767
2768   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2769
2770   if (im->lookup_table_size == 0)
2771     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2772
2773   clib_bihash_init_24_8 (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2774                          "ip6 FIB fwding table",
2775                          im->lookup_table_nbuckets, im->lookup_table_size);
2776   clib_bihash_init_24_8 (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2777                          "ip6 FIB non-fwding table",
2778                          im->lookup_table_nbuckets, im->lookup_table_size);
2779   clib_bihash_init_40_8 (&im->ip6_mtable.ip6_mhash,
2780                          "ip6 mFIB table",
2781                          im->lookup_table_nbuckets, im->lookup_table_size);
2782
2783   /* Create FIB with index 0 and table id of 0. */
2784   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2785                                      FIB_SOURCE_DEFAULT_ROUTE);
2786   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2787                                       MFIB_SOURCE_DEFAULT_ROUTE);
2788
2789   {
2790     pg_node_t *pn;
2791     pn = pg_get_node (ip6_lookup_node.index);
2792     pn->unformat_edit = unformat_pg_ip6_header;
2793   }
2794
2795   /* Unless explicitly configured, don't process HBH options */
2796   im->hbh_enabled = 0;
2797
2798   {
2799     icmp6_neighbor_solicitation_header_t p;
2800
2801     clib_memset (&p, 0, sizeof (p));
2802
2803     p.ip.ip_version_traffic_class_and_flow_label =
2804       clib_host_to_net_u32 (0x6 << 28);
2805     p.ip.payload_length =
2806       clib_host_to_net_u16 (sizeof (p) -
2807                             STRUCT_OFFSET_OF
2808                             (icmp6_neighbor_solicitation_header_t, neighbor));
2809     p.ip.protocol = IP_PROTOCOL_ICMP6;
2810     p.ip.hop_limit = 255;
2811     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2812
2813     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2814
2815     p.link_layer_option.header.type =
2816       ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2817     p.link_layer_option.header.n_data_u64s =
2818       sizeof (p.link_layer_option) / sizeof (u64);
2819
2820     vlib_packet_template_init (vm,
2821                                &im->discover_neighbor_packet_template,
2822                                &p, sizeof (p),
2823                                /* alloc chunk size */ 8,
2824                                "ip6 neighbor discovery");
2825   }
2826
2827   return error;
2828 }
2829
2830 VLIB_INIT_FUNCTION (ip6_lookup_init);
2831
2832 static clib_error_t *
2833 test_ip6_link_command_fn (vlib_main_t * vm,
2834                           unformat_input_t * input, vlib_cli_command_t * cmd)
2835 {
2836   u8 mac[6];
2837   ip6_address_t _a, *a = &_a;
2838
2839   if (unformat (input, "%U", unformat_ethernet_address, mac))
2840     {
2841       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2842       vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
2843       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2844       vlib_cli_output (vm, "Original MAC address: %U",
2845                        format_ethernet_address, mac);
2846     }
2847
2848   return 0;
2849 }
2850
2851 /*?
2852  * This command converts the given MAC Address into an IPv6 link-local
2853  * address.
2854  *
2855  * @cliexpar
2856  * Example of how to create an IPv6 link-local address:
2857  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2858  * Link local address: fe80::14d9:e0ff:fe91:7986
2859  * Original MAC address: 16:d9:e0:91:79:86
2860  * @cliexend
2861 ?*/
2862 /* *INDENT-OFF* */
2863 VLIB_CLI_COMMAND (test_link_command, static) =
2864 {
2865   .path = "test ip6 link",
2866   .function = test_ip6_link_command_fn,
2867   .short_help = "test ip6 link <mac-address>",
2868 };
2869 /* *INDENT-ON* */
2870
2871 #ifndef CLIB_MARCH_VARIANT
2872 int
2873 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2874 {
2875   u32 fib_index;
2876
2877   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2878
2879   if (~0 == fib_index)
2880     return VNET_API_ERROR_NO_SUCH_FIB;
2881
2882   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2883                                   flow_hash_config);
2884
2885   return 0;
2886 }
2887 #endif
2888
2889 static clib_error_t *
2890 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2891                               unformat_input_t * input,
2892                               vlib_cli_command_t * cmd)
2893 {
2894   int matched = 0;
2895   u32 table_id = 0;
2896   u32 flow_hash_config = 0;
2897   int rv;
2898
2899   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2900     {
2901       if (unformat (input, "table %d", &table_id))
2902         matched = 1;
2903 #define _(a,v) \
2904     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2905       foreach_flow_hash_bit
2906 #undef _
2907         else
2908         break;
2909     }
2910
2911   if (matched == 0)
2912     return clib_error_return (0, "unknown input `%U'",
2913                               format_unformat_error, input);
2914
2915   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2916   switch (rv)
2917     {
2918     case 0:
2919       break;
2920
2921     case -1:
2922       return clib_error_return (0, "no such FIB table %d", table_id);
2923
2924     default:
2925       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2926       break;
2927     }
2928
2929   return 0;
2930 }
2931
2932 /*?
2933  * Configure the set of IPv6 fields used by the flow hash.
2934  *
2935  * @cliexpar
2936  * @parblock
2937  * Example of how to set the flow hash on a given table:
2938  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2939  *
2940  * Example of display the configured flow hash:
2941  * @cliexstart{show ip6 fib}
2942  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2943  * @::/0
2944  *   unicast-ip6-chain
2945  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2946  *     [0] [@0]: dpo-drop ip6
2947  * fe80::/10
2948  *   unicast-ip6-chain
2949  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2950  *     [0] [@2]: dpo-receive
2951  * ff02::1/128
2952  *   unicast-ip6-chain
2953  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2954  *     [0] [@2]: dpo-receive
2955  * ff02::2/128
2956  *   unicast-ip6-chain
2957  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2958  *     [0] [@2]: dpo-receive
2959  * ff02::16/128
2960  *   unicast-ip6-chain
2961  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2962  *     [0] [@2]: dpo-receive
2963  * ff02::1:ff00:0/104
2964  *   unicast-ip6-chain
2965  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2966  *     [0] [@2]: dpo-receive
2967  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2968  * @::/0
2969  *   unicast-ip6-chain
2970  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2971  *     [0] [@0]: dpo-drop ip6
2972  * @::a:1:1:0:4/126
2973  *   unicast-ip6-chain
2974  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2975  *     [0] [@4]: ipv6-glean: af_packet0
2976  * @::a:1:1:0:7/128
2977  *   unicast-ip6-chain
2978  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2979  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2980  * fe80::/10
2981  *   unicast-ip6-chain
2982  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2983  *     [0] [@2]: dpo-receive
2984  * fe80::fe:3eff:fe3e:9222/128
2985  *   unicast-ip6-chain
2986  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2987  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2988  * ff02::1/128
2989  *   unicast-ip6-chain
2990  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2991  *     [0] [@2]: dpo-receive
2992  * ff02::2/128
2993  *   unicast-ip6-chain
2994  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2995  *     [0] [@2]: dpo-receive
2996  * ff02::16/128
2997  *   unicast-ip6-chain
2998  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2999  *     [0] [@2]: dpo-receive
3000  * ff02::1:ff00:0/104
3001  *   unicast-ip6-chain
3002  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
3003  *     [0] [@2]: dpo-receive
3004  * @cliexend
3005  * @endparblock
3006 ?*/
3007 /* *INDENT-OFF* */
3008 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
3009 {
3010   .path = "set ip6 flow-hash",
3011   .short_help =
3012   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3013   .function = set_ip6_flow_hash_command_fn,
3014 };
3015 /* *INDENT-ON* */
3016
3017 static clib_error_t *
3018 show_ip6_local_command_fn (vlib_main_t * vm,
3019                            unformat_input_t * input, vlib_cli_command_t * cmd)
3020 {
3021   ip6_main_t *im = &ip6_main;
3022   ip_lookup_main_t *lm = &im->lookup_main;
3023   int i;
3024
3025   vlib_cli_output (vm, "Protocols handled by ip6_local");
3026   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
3027     {
3028       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
3029         {
3030
3031           u32 node_index = vlib_get_node (vm,
3032                                           ip6_local_node.index)->
3033             next_nodes[lm->local_next_by_ip_protocol[i]];
3034           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
3035                            node_index);
3036         }
3037     }
3038   return 0;
3039 }
3040
3041
3042
3043 /*?
3044  * Display the set of protocols handled by the local IPv6 stack.
3045  *
3046  * @cliexpar
3047  * Example of how to display local protocol table:
3048  * @cliexstart{show ip6 local}
3049  * Protocols handled by ip6_local
3050  * 17
3051  * 43
3052  * 58
3053  * 115
3054  * @cliexend
3055 ?*/
3056 /* *INDENT-OFF* */
3057 VLIB_CLI_COMMAND (show_ip6_local, static) =
3058 {
3059   .path = "show ip6 local",
3060   .function = show_ip6_local_command_fn,
3061   .short_help = "show ip6 local",
3062 };
3063 /* *INDENT-ON* */
3064
3065 #ifndef CLIB_MARCH_VARIANT
3066 int
3067 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3068                              u32 table_index)
3069 {
3070   vnet_main_t *vnm = vnet_get_main ();
3071   vnet_interface_main_t *im = &vnm->interface_main;
3072   ip6_main_t *ipm = &ip6_main;
3073   ip_lookup_main_t *lm = &ipm->lookup_main;
3074   vnet_classify_main_t *cm = &vnet_classify_main;
3075   ip6_address_t *if_addr;
3076
3077   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3078     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3079
3080   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3081     return VNET_API_ERROR_NO_SUCH_ENTRY;
3082
3083   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3084   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3085
3086   if_addr = ip6_interface_first_address (ipm, sw_if_index);
3087
3088   if (NULL != if_addr)
3089     {
3090       fib_prefix_t pfx = {
3091         .fp_len = 128,
3092         .fp_proto = FIB_PROTOCOL_IP6,
3093         .fp_addr.ip6 = *if_addr,
3094       };
3095       u32 fib_index;
3096
3097       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3098                                                        sw_if_index);
3099
3100
3101       if (table_index != (u32) ~ 0)
3102         {
3103           dpo_id_t dpo = DPO_INVALID;
3104
3105           dpo_set (&dpo,
3106                    DPO_CLASSIFY,
3107                    DPO_PROTO_IP6,
3108                    classify_dpo_create (DPO_PROTO_IP6, table_index));
3109
3110           fib_table_entry_special_dpo_add (fib_index,
3111                                            &pfx,
3112                                            FIB_SOURCE_CLASSIFY,
3113                                            FIB_ENTRY_FLAG_NONE, &dpo);
3114           dpo_reset (&dpo);
3115         }
3116       else
3117         {
3118           fib_table_entry_special_remove (fib_index,
3119                                           &pfx, FIB_SOURCE_CLASSIFY);
3120         }
3121     }
3122
3123   return 0;
3124 }
3125 #endif
3126
3127 static clib_error_t *
3128 set_ip6_classify_command_fn (vlib_main_t * vm,
3129                              unformat_input_t * input,
3130                              vlib_cli_command_t * cmd)
3131 {
3132   u32 table_index = ~0;
3133   int table_index_set = 0;
3134   u32 sw_if_index = ~0;
3135   int rv;
3136
3137   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3138     {
3139       if (unformat (input, "table-index %d", &table_index))
3140         table_index_set = 1;
3141       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3142                          vnet_get_main (), &sw_if_index))
3143         ;
3144       else
3145         break;
3146     }
3147
3148   if (table_index_set == 0)
3149     return clib_error_return (0, "classify table-index must be specified");
3150
3151   if (sw_if_index == ~0)
3152     return clib_error_return (0, "interface / subif must be specified");
3153
3154   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3155
3156   switch (rv)
3157     {
3158     case 0:
3159       break;
3160
3161     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3162       return clib_error_return (0, "No such interface");
3163
3164     case VNET_API_ERROR_NO_SUCH_ENTRY:
3165       return clib_error_return (0, "No such classifier table");
3166     }
3167   return 0;
3168 }
3169
3170 /*?
3171  * Assign a classification table to an interface. The classification
3172  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3173  * commands. Once the table is create, use this command to filter packets
3174  * on an interface.
3175  *
3176  * @cliexpar
3177  * Example of how to assign a classification table to an interface:
3178  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3179 ?*/
3180 /* *INDENT-OFF* */
3181 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3182 {
3183   .path = "set ip6 classify",
3184   .short_help =
3185   "set ip6 classify intfc <interface> table-index <classify-idx>",
3186   .function = set_ip6_classify_command_fn,
3187 };
3188 /* *INDENT-ON* */
3189
3190 static clib_error_t *
3191 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3192 {
3193   ip6_main_t *im = &ip6_main;
3194   uword heapsize = 0;
3195   u32 tmp;
3196   u32 nbuckets = 0;
3197
3198   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3199     {
3200       if (unformat (input, "hash-buckets %d", &tmp))
3201         nbuckets = tmp;
3202       else if (unformat (input, "heap-size %U",
3203                          unformat_memory_size, &heapsize))
3204         ;
3205       else
3206         return clib_error_return (0, "unknown input '%U'",
3207                                   format_unformat_error, input);
3208     }
3209
3210   im->lookup_table_nbuckets = nbuckets;
3211   im->lookup_table_size = heapsize;
3212
3213   return 0;
3214 }
3215
3216 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3217
3218 /*
3219  * fd.io coding-style-patch-verification: ON
3220  *
3221  * Local Variables:
3222  * eval: (c-set-style "gnu")
3223  * End:
3224  */