VPP-1283: IPv6 PMTU missing MTU value in ICMP6 message.
[vpp.git] / src / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip6_neighbor.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
45 #include <vppinfra/cache.h>
46 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
47 #include <vnet/fib/ip6_fib.h>
48 #include <vnet/mfib/ip6_mfib.h>
49 #include <vnet/dpo/load_balance_map.h>
50 #include <vnet/dpo/classify_dpo.h>
51
52 #include <vppinfra/bihash_template.c>
53 #include <vnet/ip/ip6_forward.h>
54
55 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
56 #define OI_DECAP   0x80000000
57
58 static void
59 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
60                           ip6_main_t * im, u32 fib_index,
61                           ip_interface_address_t * a)
62 {
63   ip_lookup_main_t *lm = &im->lookup_main;
64   ip6_address_t *address = ip_interface_address_get_address (lm, a);
65   fib_prefix_t pfx = {
66     .fp_len = a->address_length,
67     .fp_proto = FIB_PROTOCOL_IP6,
68     .fp_addr.ip6 = *address,
69   };
70
71   if (a->address_length < 128)
72     {
73       fib_table_entry_update_one_path (fib_index,
74                                        &pfx,
75                                        FIB_SOURCE_INTERFACE,
76                                        (FIB_ENTRY_FLAG_CONNECTED |
77                                         FIB_ENTRY_FLAG_ATTACHED),
78                                        DPO_PROTO_IP6,
79                                        /* No next-hop address */
80                                        NULL, sw_if_index,
81                                        /* invalid FIB index */
82                                        ~0, 1,
83                                        /* no label stack */
84                                        NULL, FIB_ROUTE_PATH_FLAG_NONE);
85     }
86
87   pfx.fp_len = 128;
88   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
89     {
90       u32 classify_table_index =
91         lm->classify_table_index_by_sw_if_index[sw_if_index];
92       if (classify_table_index != (u32) ~ 0)
93         {
94           dpo_id_t dpo = DPO_INVALID;
95
96           dpo_set (&dpo,
97                    DPO_CLASSIFY,
98                    DPO_PROTO_IP6,
99                    classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
100
101           fib_table_entry_special_dpo_add (fib_index,
102                                            &pfx,
103                                            FIB_SOURCE_CLASSIFY,
104                                            FIB_ENTRY_FLAG_NONE, &dpo);
105           dpo_reset (&dpo);
106         }
107     }
108
109   fib_table_entry_update_one_path (fib_index, &pfx,
110                                    FIB_SOURCE_INTERFACE,
111                                    (FIB_ENTRY_FLAG_CONNECTED |
112                                     FIB_ENTRY_FLAG_LOCAL),
113                                    DPO_PROTO_IP6,
114                                    &pfx.fp_addr,
115                                    sw_if_index, ~0,
116                                    1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
117 }
118
119 static void
120 ip6_del_interface_routes (ip6_main_t * im,
121                           u32 fib_index,
122                           ip6_address_t * address, u32 address_length)
123 {
124   fib_prefix_t pfx = {
125     .fp_len = address_length,
126     .fp_proto = FIB_PROTOCOL_IP6,
127     .fp_addr.ip6 = *address,
128   };
129
130   if (pfx.fp_len < 128)
131     {
132       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
133
134     }
135
136   pfx.fp_len = 128;
137   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
138 }
139
140 void
141 ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
142 {
143   ip6_main_t *im = &ip6_main;
144
145   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
146
147   /*
148    * enable/disable only on the 1<->0 transition
149    */
150   if (is_enable)
151     {
152       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
153         return;
154     }
155   else
156     {
157       /* The ref count is 0 when an address is removed from an interface that has
158        * no address - this is not a ciritical error */
159       if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
160           0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
161         return;
162     }
163
164   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
165                                !is_enable, 0, 0);
166
167   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
168                                sw_if_index, !is_enable, 0, 0);
169 }
170
171 /* get first interface address */
172 ip6_address_t *
173 ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
174 {
175   ip_lookup_main_t *lm = &im->lookup_main;
176   ip_interface_address_t *ia = 0;
177   ip6_address_t *result = 0;
178
179   /* *INDENT-OFF* */
180   foreach_ip_interface_address (lm, ia, sw_if_index,
181                                 1 /* honor unnumbered */,
182   ({
183     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
184     result = a;
185     break;
186   }));
187   /* *INDENT-ON* */
188   return result;
189 }
190
191 clib_error_t *
192 ip6_add_del_interface_address (vlib_main_t * vm,
193                                u32 sw_if_index,
194                                ip6_address_t * address,
195                                u32 address_length, u32 is_del)
196 {
197   vnet_main_t *vnm = vnet_get_main ();
198   ip6_main_t *im = &ip6_main;
199   ip_lookup_main_t *lm = &im->lookup_main;
200   clib_error_t *error;
201   u32 if_address_index;
202   ip6_address_fib_t ip6_af, *addr_fib = 0;
203
204   /* local0 interface doesn't support IP addressing */
205   if (sw_if_index == 0)
206     {
207       return
208         clib_error_create ("local0 interface doesn't support IP addressing");
209     }
210
211   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
212   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
213
214   ip6_addr_fib_init (&ip6_af, address,
215                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
216   vec_add1 (addr_fib, ip6_af);
217
218   /* *INDENT-OFF* */
219   if (!is_del)
220     {
221       /* When adding an address check that it does not conflict
222          with an existing address on any interface in this table. */
223       ip_interface_address_t *ia;
224       vnet_sw_interface_t *sif;
225
226       pool_foreach(sif, vnm->interface_main.sw_interfaces,
227       ({
228           if (im->fib_index_by_sw_if_index[sw_if_index] ==
229               im->fib_index_by_sw_if_index[sif->sw_if_index])
230             {
231               foreach_ip_interface_address
232                 (&im->lookup_main, ia, sif->sw_if_index,
233                  0 /* honor unnumbered */ ,
234                  ({
235                    ip6_address_t * x =
236                      ip_interface_address_get_address
237                      (&im->lookup_main, ia);
238                    if (ip6_destination_matches_route
239                        (im, address, x, ia->address_length) ||
240                        ip6_destination_matches_route (im,
241                                                       x,
242                                                       address,
243                                                       address_length))
244                      {
245                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
246                        return
247                          clib_error_create
248                          ("failed to add %U which conflicts with %U for interface %U",
249                           format_ip6_address_and_length, address,
250                           address_length,
251                           format_ip6_address_and_length, x,
252                           ia->address_length,
253                           format_vnet_sw_if_index_name, vnm,
254                           sif->sw_if_index);
255                      }
256                  }));
257             }
258       }));
259     }
260   /* *INDENT-ON* */
261
262   {
263     uword elts_before = pool_elts (lm->if_address_pool);
264
265     error = ip_interface_address_add_del
266       (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
267     if (error)
268       goto done;
269
270     /* Pool did not grow: add duplicate address. */
271     if (elts_before == pool_elts (lm->if_address_pool))
272       goto done;
273   }
274
275   ip6_sw_interface_enable_disable (sw_if_index, !is_del);
276
277   if (is_del)
278     ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
279   else
280     ip6_add_interface_routes (vnm, sw_if_index,
281                               im, ip6_af.fib_index,
282                               pool_elt_at_index (lm->if_address_pool,
283                                                  if_address_index));
284
285   {
286     ip6_add_del_interface_address_callback_t *cb;
287     vec_foreach (cb, im->add_del_interface_address_callbacks)
288       cb->function (im, cb->function_opaque, sw_if_index,
289                     address, address_length, if_address_index, is_del);
290   }
291
292 done:
293   vec_free (addr_fib);
294   return error;
295 }
296
297 clib_error_t *
298 ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
299 {
300   ip6_main_t *im = &ip6_main;
301   ip_interface_address_t *ia;
302   ip6_address_t *a;
303   u32 is_admin_up, fib_index;
304
305   /* Fill in lookup tables with default table (0). */
306   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
307
308   vec_validate_init_empty (im->
309                            lookup_main.if_address_pool_index_by_sw_if_index,
310                            sw_if_index, ~0);
311
312   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
313
314   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
315
316   /* *INDENT-OFF* */
317   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
318                                 0 /* honor unnumbered */,
319   ({
320     a = ip_interface_address_get_address (&im->lookup_main, ia);
321     if (is_admin_up)
322       ip6_add_interface_routes (vnm, sw_if_index,
323                                 im, fib_index,
324                                 ia);
325     else
326       ip6_del_interface_routes (im, fib_index,
327                                 a, ia->address_length);
328   }));
329   /* *INDENT-ON* */
330
331   return 0;
332 }
333
334 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
335
336 /* Built-in ip6 unicast rx feature path definition */
337 /* *INDENT-OFF* */
338 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
339 {
340   .arc_name  = "ip6-unicast",
341   .start_nodes = VNET_FEATURES ("ip6-input"),
342   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
343 };
344
345 VNET_FEATURE_INIT (ip6_flow_classify, static) =
346 {
347   .arc_name = "ip6-unicast",
348   .node_name = "ip6-flow-classify",
349   .runs_before = VNET_FEATURES ("ip6-inacl"),
350 };
351
352 VNET_FEATURE_INIT (ip6_inacl, static) =
353 {
354   .arc_name = "ip6-unicast",
355   .node_name = "ip6-inacl",
356   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
357 };
358
359 VNET_FEATURE_INIT (ip6_policer_classify, static) =
360 {
361   .arc_name = "ip6-unicast",
362   .node_name = "ip6-policer-classify",
363   .runs_before = VNET_FEATURES ("ipsec-input-ip6"),
364 };
365
366 VNET_FEATURE_INIT (ip6_ipsec, static) =
367 {
368   .arc_name = "ip6-unicast",
369   .node_name = "ipsec-input-ip6",
370   .runs_before = VNET_FEATURES ("l2tp-decap"),
371 };
372
373 VNET_FEATURE_INIT (ip6_l2tp, static) =
374 {
375   .arc_name = "ip6-unicast",
376   .node_name = "l2tp-decap",
377   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
378 };
379
380 VNET_FEATURE_INIT (ip6_vpath, static) =
381 {
382   .arc_name = "ip6-unicast",
383   .node_name = "vpath-input-ip6",
384   .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
385 };
386
387 VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
388 {
389   .arc_name = "ip6-unicast",
390   .node_name = "ip6-vxlan-bypass",
391   .runs_before = VNET_FEATURES ("ip6-lookup"),
392 };
393
394 VNET_FEATURE_INIT (ip6_not_enabled, static) =
395 {
396   .arc_name = "ip6-unicast",
397   .node_name = "ip6-not-enabled",
398   .runs_before = VNET_FEATURES ("ip6-lookup"),
399 };
400
401 VNET_FEATURE_INIT (ip6_lookup, static) =
402 {
403   .arc_name = "ip6-unicast",
404   .node_name = "ip6-lookup",
405   .runs_before = 0,  /*last feature*/
406 };
407
408 /* Built-in ip6 multicast rx feature path definition (none now) */
409 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
410 {
411   .arc_name  = "ip6-multicast",
412   .start_nodes = VNET_FEATURES ("ip6-input"),
413   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
414 };
415
416 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
417   .arc_name = "ip6-multicast",
418   .node_name = "vpath-input-ip6",
419   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
420 };
421
422 VNET_FEATURE_INIT (ip6_not_enabled_mc, static) = {
423   .arc_name = "ip6-multicast",
424   .node_name = "ip6-not-enabled",
425   .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
426 };
427
428 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
429   .arc_name = "ip6-multicast",
430   .node_name = "ip6-mfib-forward-lookup",
431   .runs_before = 0, /* last feature */
432 };
433
434 /* Built-in ip4 tx feature path definition */
435 VNET_FEATURE_ARC_INIT (ip6_output, static) =
436 {
437   .arc_name  = "ip6-output",
438   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"),
439   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
440 };
441
442 VNET_FEATURE_INIT (ip6_outacl, static) = {
443   .arc_name = "ip6-output",
444   .node_name = "ip6-outacl",
445   .runs_before = VNET_FEATURES ("ipsec-output-ip6"),
446 };
447
448 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
449   .arc_name = "ip6-output",
450   .node_name = "ipsec-output-ip6",
451   .runs_before = VNET_FEATURES ("interface-output"),
452 };
453
454 VNET_FEATURE_INIT (ip6_interface_output, static) = {
455   .arc_name = "ip6-output",
456   .node_name = "interface-output",
457   .runs_before = 0, /* not before any other features */
458 };
459 /* *INDENT-ON* */
460
461 clib_error_t *
462 ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
463 {
464   ip6_main_t *im = &ip6_main;
465
466   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
467   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
468
469   if (!is_add)
470     {
471       /* Ensure that IPv6 is disabled */
472       ip6_main_t *im6 = &ip6_main;
473       ip_lookup_main_t *lm6 = &im6->lookup_main;
474       ip_interface_address_t *ia = 0;
475       ip6_address_t *address;
476       vlib_main_t *vm = vlib_get_main ();
477
478       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
479       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
480       /* *INDENT-OFF* */
481       foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
482       ({
483         address = ip_interface_address_get_address (lm6, ia);
484         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
485       }));
486       /* *INDENT-ON* */
487       ip6_mfib_interface_enable_disable (sw_if_index, 0);
488     }
489
490   vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
491                                is_add, 0, 0);
492
493   vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled",
494                                sw_if_index, is_add, 0, 0);
495
496   return /* no error */ 0;
497 }
498
499 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
500
501 static uword
502 ip6_lookup (vlib_main_t * vm,
503             vlib_node_runtime_t * node, vlib_frame_t * frame)
504 {
505   return ip6_lookup_inline (vm, node, frame);
506 }
507
508 static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
509
510 /* *INDENT-OFF* */
511 VLIB_REGISTER_NODE (ip6_lookup_node) =
512 {
513   .function = ip6_lookup,
514   .name = "ip6-lookup",
515   .vector_size = sizeof (u32),
516   .format_trace = format_ip6_lookup_trace,
517   .n_next_nodes = IP6_LOOKUP_N_NEXT,
518   .next_nodes = IP6_LOOKUP_NEXT_NODES,
519 };
520 /* *INDENT-ON* */
521
522 VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup);
523
524 always_inline uword
525 ip6_load_balance (vlib_main_t * vm,
526                   vlib_node_runtime_t * node, vlib_frame_t * frame)
527 {
528   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
529   u32 n_left_from, n_left_to_next, *from, *to_next;
530   ip_lookup_next_t next;
531   u32 thread_index = vlib_get_thread_index ();
532   ip6_main_t *im = &ip6_main;
533
534   from = vlib_frame_vector_args (frame);
535   n_left_from = frame->n_vectors;
536   next = node->cached_next_index;
537
538   if (node->flags & VLIB_NODE_FLAG_TRACE)
539     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
540
541   while (n_left_from > 0)
542     {
543       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
544
545
546       while (n_left_from >= 4 && n_left_to_next >= 2)
547         {
548           ip_lookup_next_t next0, next1;
549           const load_balance_t *lb0, *lb1;
550           vlib_buffer_t *p0, *p1;
551           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
552           const ip6_header_t *ip0, *ip1;
553           const dpo_id_t *dpo0, *dpo1;
554
555           /* Prefetch next iteration. */
556           {
557             vlib_buffer_t *p2, *p3;
558
559             p2 = vlib_get_buffer (vm, from[2]);
560             p3 = vlib_get_buffer (vm, from[3]);
561
562             vlib_prefetch_buffer_header (p2, STORE);
563             vlib_prefetch_buffer_header (p3, STORE);
564
565             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
566             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
567           }
568
569           pi0 = to_next[0] = from[0];
570           pi1 = to_next[1] = from[1];
571
572           from += 2;
573           n_left_from -= 2;
574           to_next += 2;
575           n_left_to_next -= 2;
576
577           p0 = vlib_get_buffer (vm, pi0);
578           p1 = vlib_get_buffer (vm, pi1);
579
580           ip0 = vlib_buffer_get_current (p0);
581           ip1 = vlib_buffer_get_current (p1);
582           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
583           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
584
585           lb0 = load_balance_get (lbi0);
586           lb1 = load_balance_get (lbi1);
587
588           /*
589            * this node is for via FIBs we can re-use the hash value from the
590            * to node if present.
591            * We don't want to use the same hash value at each level in the recursion
592            * graph as that would lead to polarisation
593            */
594           hc0 = hc1 = 0;
595
596           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
597             {
598               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
599                 {
600                   hc0 = vnet_buffer (p0)->ip.flow_hash =
601                     vnet_buffer (p0)->ip.flow_hash >> 1;
602                 }
603               else
604                 {
605                   hc0 = vnet_buffer (p0)->ip.flow_hash =
606                     ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
607                 }
608               dpo0 =
609                 load_balance_get_fwd_bucket (lb0,
610                                              (hc0 &
611                                               lb0->lb_n_buckets_minus_1));
612             }
613           else
614             {
615               dpo0 = load_balance_get_bucket_i (lb0, 0);
616             }
617           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
618             {
619               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
620                 {
621                   hc1 = vnet_buffer (p1)->ip.flow_hash =
622                     vnet_buffer (p1)->ip.flow_hash >> 1;
623                 }
624               else
625                 {
626                   hc1 = vnet_buffer (p1)->ip.flow_hash =
627                     ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
628                 }
629               dpo1 =
630                 load_balance_get_fwd_bucket (lb1,
631                                              (hc1 &
632                                               lb1->lb_n_buckets_minus_1));
633             }
634           else
635             {
636               dpo1 = load_balance_get_bucket_i (lb1, 0);
637             }
638
639           next0 = dpo0->dpoi_next_node;
640           next1 = dpo1->dpoi_next_node;
641
642           /* Only process the HBH Option Header if explicitly configured to do so */
643           if (PREDICT_FALSE
644               (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
645             {
646               next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
647                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
648             }
649           /* Only process the HBH Option Header if explicitly configured to do so */
650           if (PREDICT_FALSE
651               (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
652             {
653               next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
654                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
655             }
656
657           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
658           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662           vlib_increment_combined_counter
663             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
664
665           vlib_validate_buffer_enqueue_x2 (vm, node, next,
666                                            to_next, n_left_to_next,
667                                            pi0, pi1, next0, next1);
668         }
669
670       while (n_left_from > 0 && n_left_to_next > 0)
671         {
672           ip_lookup_next_t next0;
673           const load_balance_t *lb0;
674           vlib_buffer_t *p0;
675           u32 pi0, lbi0, hc0;
676           const ip6_header_t *ip0;
677           const dpo_id_t *dpo0;
678
679           pi0 = from[0];
680           to_next[0] = pi0;
681           from += 1;
682           to_next += 1;
683           n_left_to_next -= 1;
684           n_left_from -= 1;
685
686           p0 = vlib_get_buffer (vm, pi0);
687
688           ip0 = vlib_buffer_get_current (p0);
689           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
690
691           lb0 = load_balance_get (lbi0);
692
693           hc0 = 0;
694           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
695             {
696               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
697                 {
698                   hc0 = vnet_buffer (p0)->ip.flow_hash =
699                     vnet_buffer (p0)->ip.flow_hash >> 1;
700                 }
701               else
702                 {
703                   hc0 = vnet_buffer (p0)->ip.flow_hash =
704                     ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
705                 }
706               dpo0 =
707                 load_balance_get_fwd_bucket (lb0,
708                                              (hc0 &
709                                               lb0->lb_n_buckets_minus_1));
710             }
711           else
712             {
713               dpo0 = load_balance_get_bucket_i (lb0, 0);
714             }
715
716           next0 = dpo0->dpoi_next_node;
717           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
718
719           /* Only process the HBH Option Header if explicitly configured to do so */
720           if (PREDICT_FALSE
721               (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
722             {
723               next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
724                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
725             }
726
727           vlib_increment_combined_counter
728             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
729
730           vlib_validate_buffer_enqueue_x1 (vm, node, next,
731                                            to_next, n_left_to_next,
732                                            pi0, next0);
733         }
734
735       vlib_put_next_frame (vm, node, next, n_left_to_next);
736     }
737
738   return frame->n_vectors;
739 }
740
741 /* *INDENT-OFF* */
742 VLIB_REGISTER_NODE (ip6_load_balance_node) =
743 {
744   .function = ip6_load_balance,
745   .name = "ip6-load-balance",
746   .vector_size = sizeof (u32),
747   .sibling_of = "ip6-lookup",
748   .format_trace = format_ip6_lookup_trace,
749 };
750 /* *INDENT-ON* */
751
752 VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance);
753
754 typedef struct
755 {
756   /* Adjacency taken. */
757   u32 adj_index;
758   u32 flow_hash;
759   u32 fib_index;
760
761   /* Packet data, possibly *after* rewrite. */
762   u8 packet_data[128 - 1 * sizeof (u32)];
763 }
764 ip6_forward_next_trace_t;
765
766 u8 *
767 format_ip6_forward_next_trace (u8 * s, va_list * args)
768 {
769   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
770   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
771   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
772   u32 indent = format_get_indent (s);
773
774   s = format (s, "%U%U",
775               format_white_space, indent,
776               format_ip6_header, t->packet_data, sizeof (t->packet_data));
777   return s;
778 }
779
780 static u8 *
781 format_ip6_lookup_trace (u8 * s, va_list * args)
782 {
783   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
784   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
785   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
786   u32 indent = format_get_indent (s);
787
788   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
789               t->fib_index, t->adj_index, t->flow_hash);
790   s = format (s, "\n%U%U",
791               format_white_space, indent,
792               format_ip6_header, t->packet_data, sizeof (t->packet_data));
793   return s;
794 }
795
796
797 static u8 *
798 format_ip6_rewrite_trace (u8 * s, va_list * args)
799 {
800   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
801   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
802   ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
803   u32 indent = format_get_indent (s);
804
805   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
806               t->fib_index, t->adj_index, format_ip_adjacency,
807               t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
808   s = format (s, "\n%U%U",
809               format_white_space, indent,
810               format_ip_adjacency_packet_data,
811               t->adj_index, t->packet_data, sizeof (t->packet_data));
812   return s;
813 }
814
815 /* Common trace function for all ip6-forward next nodes. */
816 void
817 ip6_forward_next_trace (vlib_main_t * vm,
818                         vlib_node_runtime_t * node,
819                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
820 {
821   u32 *from, n_left;
822   ip6_main_t *im = &ip6_main;
823
824   n_left = frame->n_vectors;
825   from = vlib_frame_vector_args (frame);
826
827   while (n_left >= 4)
828     {
829       u32 bi0, bi1;
830       vlib_buffer_t *b0, *b1;
831       ip6_forward_next_trace_t *t0, *t1;
832
833       /* Prefetch next iteration. */
834       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
835       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
836
837       bi0 = from[0];
838       bi1 = from[1];
839
840       b0 = vlib_get_buffer (vm, bi0);
841       b1 = vlib_get_buffer (vm, bi1);
842
843       if (b0->flags & VLIB_BUFFER_IS_TRACED)
844         {
845           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
846           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
847           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
848           t0->fib_index =
849             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
850              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
851             vec_elt (im->fib_index_by_sw_if_index,
852                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
853
854           clib_memcpy (t0->packet_data,
855                        vlib_buffer_get_current (b0),
856                        sizeof (t0->packet_data));
857         }
858       if (b1->flags & VLIB_BUFFER_IS_TRACED)
859         {
860           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
861           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
862           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
863           t1->fib_index =
864             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
865              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
866             vec_elt (im->fib_index_by_sw_if_index,
867                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
868
869           clib_memcpy (t1->packet_data,
870                        vlib_buffer_get_current (b1),
871                        sizeof (t1->packet_data));
872         }
873       from += 2;
874       n_left -= 2;
875     }
876
877   while (n_left >= 1)
878     {
879       u32 bi0;
880       vlib_buffer_t *b0;
881       ip6_forward_next_trace_t *t0;
882
883       bi0 = from[0];
884
885       b0 = vlib_get_buffer (vm, bi0);
886
887       if (b0->flags & VLIB_BUFFER_IS_TRACED)
888         {
889           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
890           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
891           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
892           t0->fib_index =
893             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
894              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
895             vec_elt (im->fib_index_by_sw_if_index,
896                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
897
898           clib_memcpy (t0->packet_data,
899                        vlib_buffer_get_current (b0),
900                        sizeof (t0->packet_data));
901         }
902       from += 1;
903       n_left -= 1;
904     }
905 }
906
907 /* Compute TCP/UDP/ICMP6 checksum in software. */
908 u16
909 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
910                                    ip6_header_t * ip0, int *bogus_lengthp)
911 {
912   ip_csum_t sum0;
913   u16 sum16, payload_length_host_byte_order;
914   u32 i, n_this_buffer, n_bytes_left;
915   u32 headers_size = sizeof (ip0[0]);
916   void *data_this_buffer;
917
918   ASSERT (bogus_lengthp);
919   *bogus_lengthp = 0;
920
921   /* Initialize checksum with ip header. */
922   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
923   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
924   data_this_buffer = (void *) (ip0 + 1);
925
926   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
927     {
928       sum0 = ip_csum_with_carry (sum0,
929                                  clib_mem_unaligned (&ip0->
930                                                      src_address.as_uword[i],
931                                                      uword));
932       sum0 =
933         ip_csum_with_carry (sum0,
934                             clib_mem_unaligned (&ip0->dst_address.as_uword[i],
935                                                 uword));
936     }
937
938   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
939    * or UDP-Ping packets */
940   if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
941     {
942       u32 skip_bytes;
943       ip6_hop_by_hop_ext_t *ext_hdr =
944         (ip6_hop_by_hop_ext_t *) data_this_buffer;
945
946       /* validate really icmp6 next */
947       ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
948               || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
949
950       skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
951       data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
952
953       payload_length_host_byte_order -= skip_bytes;
954       headers_size += skip_bytes;
955     }
956
957   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
958   if (p0 && n_this_buffer + headers_size > p0->current_length)
959     n_this_buffer =
960       p0->current_length >
961       headers_size ? p0->current_length - headers_size : 0;
962   while (1)
963     {
964       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
965       n_bytes_left -= n_this_buffer;
966       if (n_bytes_left == 0)
967         break;
968
969       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
970         {
971           *bogus_lengthp = 1;
972           return 0xfefe;
973         }
974       p0 = vlib_get_buffer (vm, p0->next_buffer);
975       data_this_buffer = vlib_buffer_get_current (p0);
976       n_this_buffer = p0->current_length;
977     }
978
979   sum16 = ~ip_csum_fold (sum0);
980
981   return sum16;
982 }
983
984 u32
985 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
986 {
987   ip6_header_t *ip0 = vlib_buffer_get_current (p0);
988   udp_header_t *udp0;
989   u16 sum16;
990   int bogus_length;
991
992   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
993   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
994           || ip0->protocol == IP_PROTOCOL_ICMP6
995           || ip0->protocol == IP_PROTOCOL_UDP
996           || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
997
998   udp0 = (void *) (ip0 + 1);
999   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1000     {
1001       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1002                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1003       return p0->flags;
1004     }
1005
1006   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1007
1008   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1009                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1010
1011   return p0->flags;
1012 }
1013
1014 /**
1015  * @brief returns number of links on which src is reachable.
1016  */
1017 always_inline int
1018 ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
1019 {
1020   const load_balance_t *lb0;
1021   index_t lbi;
1022   u32 fib_index;
1023
1024   fib_index = vec_elt (im->fib_index_by_sw_if_index,
1025                        vnet_buffer (b)->sw_if_index[VLIB_RX]);
1026   fib_index =
1027     (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1028     fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
1029
1030   lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address);
1031   lb0 = load_balance_get (lbi);
1032
1033   return (fib_urpf_check_size (lb0->lb_urpf));
1034 }
1035
1036 always_inline u8
1037 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
1038                            u32 * udp_offset0)
1039 {
1040   u32 proto0;
1041   proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
1042   if (proto0 != IP_PROTOCOL_UDP)
1043     {
1044       proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
1045       proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
1046     }
1047   return proto0;
1048 }
1049
1050 /* *INDENT-OFF* */
1051 VNET_FEATURE_ARC_INIT (ip6_local) =
1052 {
1053   .arc_name  = "ip6-local",
1054   .start_nodes = VNET_FEATURES ("ip6-local"),
1055 };
1056 /* *INDENT-ON* */
1057
1058 static uword
1059 ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1060                   vlib_frame_t * frame, int head_of_feature_arc)
1061 {
1062   ip6_main_t *im = &ip6_main;
1063   ip_lookup_main_t *lm = &im->lookup_main;
1064   ip_local_next_t next_index;
1065   u32 *from, *to_next, n_left_from, n_left_to_next;
1066   vlib_node_runtime_t *error_node =
1067     vlib_node_get_runtime (vm, ip6_input_node.index);
1068   u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
1069
1070   from = vlib_frame_vector_args (frame);
1071   n_left_from = frame->n_vectors;
1072   next_index = node->cached_next_index;
1073
1074   if (node->flags & VLIB_NODE_FLAG_TRACE)
1075     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1076
1077   while (n_left_from > 0)
1078     {
1079       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1080
1081       while (n_left_from >= 4 && n_left_to_next >= 2)
1082         {
1083           vlib_buffer_t *p0, *p1;
1084           ip6_header_t *ip0, *ip1;
1085           udp_header_t *udp0, *udp1;
1086           u32 pi0, ip_len0, udp_len0, flags0, next0;
1087           u32 pi1, ip_len1, udp_len1, flags1, next1;
1088           i32 len_diff0, len_diff1;
1089           u8 error0, type0, good_l4_csum0, is_tcp_udp0;
1090           u8 error1, type1, good_l4_csum1, is_tcp_udp1;
1091           u32 udp_offset0, udp_offset1;
1092
1093           pi0 = to_next[0] = from[0];
1094           pi1 = to_next[1] = from[1];
1095           from += 2;
1096           n_left_from -= 2;
1097           to_next += 2;
1098           n_left_to_next -= 2;
1099
1100           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1101
1102           p0 = vlib_get_buffer (vm, pi0);
1103           p1 = vlib_get_buffer (vm, pi1);
1104
1105           ip0 = vlib_buffer_get_current (p0);
1106           ip1 = vlib_buffer_get_current (p1);
1107
1108           if (head_of_feature_arc == 0)
1109             goto skip_checks;
1110
1111           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1112           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1113
1114           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1115           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1116
1117           flags0 = p0->flags;
1118           flags1 = p1->flags;
1119
1120           is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
1121           is_tcp_udp1 = ip6_next_proto_is_tcp_udp (p1, ip1, &udp_offset1);
1122
1123           good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1124                            || (flags0 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1125                                || flags0 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1126             != 0;
1127           good_l4_csum1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1128                            || (flags1 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1129                                || flags1 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1130             != 0;
1131           len_diff0 = 0;
1132           len_diff1 = 0;
1133
1134           if (PREDICT_TRUE (is_tcp_udp0))
1135             {
1136               udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
1137               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1138               good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
1139                 && udp0->checksum == 0;
1140               /* Verify UDP length. */
1141               if (is_tcp_udp0 == IP_PROTOCOL_UDP)
1142                 {
1143                   ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1144                   udp_len0 = clib_net_to_host_u16 (udp0->length);
1145                   len_diff0 = ip_len0 - udp_len0;
1146                 }
1147             }
1148           if (PREDICT_TRUE (is_tcp_udp1))
1149             {
1150               udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1);
1151               /* Don't verify UDP checksum for packets with explicit zero checksum. */
1152               good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP
1153                 && udp1->checksum == 0;
1154               /* Verify UDP length. */
1155               if (is_tcp_udp1 == IP_PROTOCOL_UDP)
1156                 {
1157                   ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1158                   udp_len1 = clib_net_to_host_u16 (udp1->length);
1159                   len_diff1 = ip_len1 - udp_len1;
1160                 }
1161             }
1162
1163           good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1164           good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1165
1166           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1167           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1168
1169           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1170                              && !good_l4_csum0
1171                              && !(flags0 &
1172                                   VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
1173             {
1174               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1175               good_l4_csum0 =
1176                 (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1177             }
1178           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1179                              && !good_l4_csum1
1180                              && !(flags1 &
1181                                   VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
1182             {
1183               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1184               good_l4_csum1 =
1185                 (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1186             }
1187
1188           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1189           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1190           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1191
1192           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1193                   IP6_ERROR_UDP_CHECKSUM);
1194           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1195                   IP6_ERROR_ICMP_CHECKSUM);
1196           error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
1197           error1 = (!good_l4_csum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1);
1198
1199           /* Drop packets from unroutable hosts. */
1200           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1201           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1202               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1203               !ip6_address_is_link_local_unicast (&ip0->src_address))
1204             {
1205               error0 = (!ip6_urpf_loose_check (im, p0, ip0)
1206                         ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
1207             }
1208           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1209               type1 != IP_BUILTIN_PROTOCOL_ICMP &&
1210               !ip6_address_is_link_local_unicast (&ip1->src_address))
1211             {
1212               error1 = (!ip6_urpf_loose_check (im, p1, ip1)
1213                         ? IP6_ERROR_SRC_LOOKUP_MISS : error1);
1214             }
1215
1216           /* TODO maybe move to lookup? */
1217           vnet_buffer (p0)->ip.fib_index =
1218             vec_elt (im->fib_index_by_sw_if_index,
1219                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1220           vnet_buffer (p0)->ip.fib_index =
1221             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1222              (u32) ~ 0) ? vnet_buffer (p0)->ip.
1223             fib_index : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1224
1225           vnet_buffer (p1)->ip.fib_index =
1226             vec_elt (im->fib_index_by_sw_if_index,
1227                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1228           vnet_buffer (p1)->ip.fib_index =
1229             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1230              (u32) ~ 0) ? vnet_buffer (p1)->ip.
1231             fib_index : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1232
1233
1234         skip_checks:
1235
1236           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1237           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1238
1239           next0 =
1240             error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1241           next1 =
1242             error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1243
1244           p0->error = error_node->errors[error0];
1245           p1->error = error_node->errors[error1];
1246
1247           if (head_of_feature_arc)
1248             {
1249               if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1250                 vnet_feature_arc_start (arc_index,
1251                                         vnet_buffer (p0)->sw_if_index
1252                                         [VLIB_RX], &next0, p0);
1253               if (PREDICT_TRUE (error1 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1254                 vnet_feature_arc_start (arc_index,
1255                                         vnet_buffer (p1)->sw_if_index
1256                                         [VLIB_RX], &next1, p1);
1257             }
1258
1259           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1260                                            to_next, n_left_to_next,
1261                                            pi0, pi1, next0, next1);
1262         }
1263
1264       while (n_left_from > 0 && n_left_to_next > 0)
1265         {
1266           vlib_buffer_t *p0;
1267           ip6_header_t *ip0;
1268           udp_header_t *udp0;
1269           u32 pi0, ip_len0, udp_len0, flags0, next0;
1270           i32 len_diff0;
1271           u8 error0, type0, good_l4_csum0;
1272           u32 udp_offset0;
1273           u8 is_tcp_udp0;
1274
1275           pi0 = to_next[0] = from[0];
1276           from += 1;
1277           n_left_from -= 1;
1278           to_next += 1;
1279           n_left_to_next -= 1;
1280
1281           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1282
1283           p0 = vlib_get_buffer (vm, pi0);
1284           ip0 = vlib_buffer_get_current (p0);
1285
1286           if (head_of_feature_arc == 0)
1287             goto skip_check;
1288
1289           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1290
1291           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1292           flags0 = p0->flags;
1293           is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
1294           good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1295                            || (flags0 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1296                                || flags0 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1297             != 0;
1298
1299           len_diff0 = 0;
1300           if (PREDICT_TRUE (is_tcp_udp0))
1301             {
1302               udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
1303               /* Don't verify UDP checksum for packets with explicit zero
1304                * checksum. */
1305               good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
1306                 && udp0->checksum == 0;
1307               /* Verify UDP length. */
1308               if (is_tcp_udp0 == IP_PROTOCOL_UDP)
1309                 {
1310                   ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1311                   udp_len0 = clib_net_to_host_u16 (udp0->length);
1312                   len_diff0 = ip_len0 - udp_len0;
1313                 }
1314             }
1315
1316           good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1317           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1318
1319           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1320                              && !good_l4_csum0
1321                              && !(flags0 &
1322                                   VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
1323             {
1324               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1325               good_l4_csum0 =
1326                 (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1327             }
1328
1329           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1330           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1331
1332           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
1333                   IP6_ERROR_UDP_CHECKSUM);
1334           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
1335                   IP6_ERROR_ICMP_CHECKSUM);
1336           error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
1337
1338           /* If this is a neighbor solicitation (ICMP), skip src RPF check */
1339           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1340               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1341               !ip6_address_is_link_local_unicast (&ip0->src_address))
1342             {
1343               error0 = (!ip6_urpf_loose_check (im, p0, ip0)
1344                         ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
1345             }
1346
1347           vnet_buffer (p0)->ip.fib_index =
1348             vec_elt (im->fib_index_by_sw_if_index,
1349                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1350           vnet_buffer (p0)->ip.fib_index =
1351             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1352              (u32) ~ 0) ? vnet_buffer (p0)->ip.
1353             fib_index : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1354
1355         skip_check:
1356
1357           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1358           next0 =
1359             error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1360
1361           p0->error = error_node->errors[error0];
1362
1363           if (head_of_feature_arc)
1364             {
1365               if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
1366                 vnet_feature_arc_start (arc_index,
1367                                         vnet_buffer (p0)->sw_if_index
1368                                         [VLIB_RX], &next0, p0);
1369             }
1370
1371           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1372                                            to_next, n_left_to_next,
1373                                            pi0, next0);
1374         }
1375
1376       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1377     }
1378
1379   return frame->n_vectors;
1380 }
1381
1382 static uword
1383 ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1384 {
1385   return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1386 }
1387
1388 /* *INDENT-OFF* */
1389 VLIB_REGISTER_NODE (ip6_local_node, static) =
1390 {
1391   .function = ip6_local,
1392   .name = "ip6-local",
1393   .vector_size = sizeof (u32),
1394   .format_trace = format_ip6_forward_next_trace,
1395   .n_next_nodes = IP_LOCAL_N_NEXT,
1396   .next_nodes =
1397   {
1398     [IP_LOCAL_NEXT_DROP] = "ip6-drop",
1399     [IP_LOCAL_NEXT_PUNT] = "ip6-punt",
1400     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1401     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1402   },
1403 };
1404 /* *INDENT-ON* */
1405
1406 VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local);
1407
1408
1409 static uword
1410 ip6_local_end_of_arc (vlib_main_t * vm,
1411                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1412 {
1413   return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1414 }
1415
1416 /* *INDENT-OFF* */
1417 VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = {
1418   .function = ip6_local_end_of_arc,
1419   .name = "ip6-local-end-of-arc",
1420   .vector_size = sizeof (u32),
1421
1422   .format_trace = format_ip6_forward_next_trace,
1423   .sibling_of = "ip6-local",
1424 };
1425
1426 VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc)
1427
1428 VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
1429   .arc_name = "ip6-local",
1430   .node_name = "ip6-local-end-of-arc",
1431   .runs_before = 0, /* not before any other features */
1432 };
1433 /* *INDENT-ON* */
1434
1435 void
1436 ip6_register_protocol (u32 protocol, u32 node_index)
1437 {
1438   vlib_main_t *vm = vlib_get_main ();
1439   ip6_main_t *im = &ip6_main;
1440   ip_lookup_main_t *lm = &im->lookup_main;
1441
1442   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1443   lm->local_next_by_ip_protocol[protocol] =
1444     vlib_node_add_next (vm, ip6_local_node.index, node_index);
1445 }
1446
1447 clib_error_t *
1448 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
1449 {
1450   vnet_main_t *vnm = vnet_get_main ();
1451   ip6_main_t *im = &ip6_main;
1452   icmp6_neighbor_solicitation_header_t *h;
1453   ip6_address_t *src;
1454   ip_interface_address_t *ia;
1455   ip_adjacency_t *adj;
1456   vnet_hw_interface_t *hi;
1457   vnet_sw_interface_t *si;
1458   vlib_buffer_t *b;
1459   adj_index_t ai;
1460   u32 bi = 0;
1461   int bogus_length;
1462
1463   si = vnet_get_sw_interface (vnm, sw_if_index);
1464
1465   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1466     {
1467       return clib_error_return (0, "%U: interface %U down",
1468                                 format_ip6_address, dst,
1469                                 format_vnet_sw_if_index_name, vnm,
1470                                 sw_if_index);
1471     }
1472
1473   src =
1474     ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1475   if (!src)
1476     {
1477       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1478       return clib_error_return
1479         (0, "no matching interface address for destination %U (interface %U)",
1480          format_ip6_address, dst,
1481          format_vnet_sw_if_index_name, vnm, sw_if_index);
1482     }
1483
1484   h =
1485     vlib_packet_template_get_packet (vm,
1486                                      &im->discover_neighbor_packet_template,
1487                                      &bi);
1488
1489   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1490
1491   /* Destination address is a solicited node multicast address.  We need to fill in
1492      the low 24 bits with low 24 bits of target's address. */
1493   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1494   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1495   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1496
1497   h->ip.src_address = src[0];
1498   h->neighbor.target_address = dst[0];
1499
1500   if (PREDICT_FALSE (!hi->hw_address))
1501     {
1502       return clib_error_return (0, "%U: interface %U do not support ip probe",
1503                                 format_ip6_address, dst,
1504                                 format_vnet_sw_if_index_name, vnm,
1505                                 sw_if_index);
1506     }
1507
1508   clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address,
1509                vec_len (hi->hw_address));
1510
1511   h->neighbor.icmp.checksum =
1512     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1513   ASSERT (bogus_length == 0);
1514
1515   b = vlib_get_buffer (vm, bi);
1516   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1517     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1518
1519   /* Add encapsulation string for software interface (e.g. ethernet header). */
1520   ip46_address_t nh = {
1521     .ip6 = *dst,
1522   };
1523
1524   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
1525                             VNET_LINK_IP6, &nh, sw_if_index);
1526   adj = adj_get (ai);
1527
1528   /* Peer has been previously resolved, retrieve glean adj instead */
1529   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1530     {
1531       adj_unlock (ai);
1532       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
1533                                   VNET_LINK_IP6, sw_if_index, &nh);
1534       adj = adj_get (ai);
1535     }
1536
1537   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1538   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1539
1540   {
1541     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1542     u32 *to_next = vlib_frame_vector_args (f);
1543     to_next[0] = bi;
1544     f->n_vectors = 1;
1545     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1546   }
1547
1548   adj_unlock (ai);
1549   return /* no error */ 0;
1550 }
1551
1552 typedef enum
1553 {
1554   IP6_REWRITE_NEXT_DROP,
1555   IP6_REWRITE_NEXT_ICMP_ERROR,
1556 } ip6_rewrite_next_t;
1557
1558 /**
1559  * This bits of an IPv6 address to mask to construct a multicast
1560  * MAC address
1561  */
1562 #define IP6_MCAST_ADDR_MASK 0xffffffff
1563
1564 always_inline void
1565 ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
1566                u16 adj_packet_bytes, u32 * next, u32 * error)
1567 {
1568   if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
1569     {
1570       *error = IP6_ERROR_MTU_EXCEEDED;
1571       icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
1572                                    adj_packet_bytes);
1573       *next = IP6_REWRITE_NEXT_ICMP_ERROR;
1574     }
1575 }
1576
1577 always_inline uword
1578 ip6_rewrite_inline (vlib_main_t * vm,
1579                     vlib_node_runtime_t * node,
1580                     vlib_frame_t * frame,
1581                     int do_counters, int is_midchain, int is_mcast)
1582 {
1583   ip_lookup_main_t *lm = &ip6_main.lookup_main;
1584   u32 *from = vlib_frame_vector_args (frame);
1585   u32 n_left_from, n_left_to_next, *to_next, next_index;
1586   vlib_node_runtime_t *error_node =
1587     vlib_node_get_runtime (vm, ip6_input_node.index);
1588
1589   n_left_from = frame->n_vectors;
1590   next_index = node->cached_next_index;
1591   u32 thread_index = vlib_get_thread_index ();
1592
1593   while (n_left_from > 0)
1594     {
1595       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1596
1597       while (n_left_from >= 4 && n_left_to_next >= 2)
1598         {
1599           ip_adjacency_t *adj0, *adj1;
1600           vlib_buffer_t *p0, *p1;
1601           ip6_header_t *ip0, *ip1;
1602           u32 pi0, rw_len0, next0, error0, adj_index0;
1603           u32 pi1, rw_len1, next1, error1, adj_index1;
1604           u32 tx_sw_if_index0, tx_sw_if_index1;
1605
1606           /* Prefetch next iteration. */
1607           {
1608             vlib_buffer_t *p2, *p3;
1609
1610             p2 = vlib_get_buffer (vm, from[2]);
1611             p3 = vlib_get_buffer (vm, from[3]);
1612
1613             vlib_prefetch_buffer_header (p2, LOAD);
1614             vlib_prefetch_buffer_header (p3, LOAD);
1615
1616             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1617             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1618
1619             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1620             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1621           }
1622
1623           pi0 = to_next[0] = from[0];
1624           pi1 = to_next[1] = from[1];
1625
1626           from += 2;
1627           n_left_from -= 2;
1628           to_next += 2;
1629           n_left_to_next -= 2;
1630
1631           p0 = vlib_get_buffer (vm, pi0);
1632           p1 = vlib_get_buffer (vm, pi1);
1633
1634           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1635           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
1636
1637           ip0 = vlib_buffer_get_current (p0);
1638           ip1 = vlib_buffer_get_current (p1);
1639
1640           error0 = error1 = IP6_ERROR_NONE;
1641           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1642
1643           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
1644             {
1645               i32 hop_limit0 = ip0->hop_limit;
1646
1647               /* Input node should have reject packets with hop limit 0. */
1648               ASSERT (ip0->hop_limit > 0);
1649
1650               hop_limit0 -= 1;
1651
1652               ip0->hop_limit = hop_limit0;
1653
1654               /*
1655                * If the hop count drops below 1 when forwarding, generate
1656                * an ICMP response.
1657                */
1658               if (PREDICT_FALSE (hop_limit0 <= 0))
1659                 {
1660                   error0 = IP6_ERROR_TIME_EXPIRED;
1661                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1662                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1663                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1664                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1665                                                0);
1666                 }
1667             }
1668           else
1669             {
1670               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1671             }
1672           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
1673             {
1674               i32 hop_limit1 = ip1->hop_limit;
1675
1676               /* Input node should have reject packets with hop limit 0. */
1677               ASSERT (ip1->hop_limit > 0);
1678
1679               hop_limit1 -= 1;
1680
1681               ip1->hop_limit = hop_limit1;
1682
1683               /*
1684                * If the hop count drops below 1 when forwarding, generate
1685                * an ICMP response.
1686                */
1687               if (PREDICT_FALSE (hop_limit1 <= 0))
1688                 {
1689                   error1 = IP6_ERROR_TIME_EXPIRED;
1690                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1691                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1692                   icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
1693                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1694                                                0);
1695                 }
1696             }
1697           else
1698             {
1699               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1700             }
1701           adj0 = adj_get (adj_index0);
1702           adj1 = adj_get (adj_index1);
1703
1704           rw_len0 = adj0[0].rewrite_header.data_bytes;
1705           rw_len1 = adj1[0].rewrite_header.data_bytes;
1706           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1707           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
1708
1709           if (do_counters)
1710             {
1711               vlib_increment_combined_counter
1712                 (&adjacency_counters,
1713                  thread_index, adj_index0, 1,
1714                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1715               vlib_increment_combined_counter
1716                 (&adjacency_counters,
1717                  thread_index, adj_index1, 1,
1718                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
1719             }
1720
1721           /* Check MTU of outgoing interface. */
1722           ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) +
1723                          sizeof (ip6_header_t),
1724                          adj0[0].rewrite_header.max_l3_packet_bytes,
1725                          &next0, &error0);
1726           ip6_mtu_check (p1, clib_net_to_host_u16 (ip1->payload_length) +
1727                          sizeof (ip6_header_t),
1728                          adj1[0].rewrite_header.max_l3_packet_bytes,
1729                          &next1, &error1);
1730
1731           /* Don't adjust the buffer for hop count issue; icmp-error node
1732            * wants to see the IP headerr */
1733           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1734             {
1735               p0->current_data -= rw_len0;
1736               p0->current_length += rw_len0;
1737
1738               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1739               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1740               next0 = adj0[0].rewrite_header.next_index;
1741
1742               if (PREDICT_FALSE
1743                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1744                 vnet_feature_arc_start (lm->output_feature_arc_index,
1745                                         tx_sw_if_index0, &next0, p0);
1746             }
1747           if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
1748             {
1749               p1->current_data -= rw_len1;
1750               p1->current_length += rw_len1;
1751
1752               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1753               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
1754               next1 = adj1[0].rewrite_header.next_index;
1755
1756               if (PREDICT_FALSE
1757                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1758                 vnet_feature_arc_start (lm->output_feature_arc_index,
1759                                         tx_sw_if_index1, &next1, p1);
1760             }
1761
1762           /* Guess we are only writing on simple Ethernet header. */
1763           vnet_rewrite_two_headers (adj0[0], adj1[0],
1764                                     ip0, ip1, sizeof (ethernet_header_t));
1765
1766           if (is_midchain)
1767             {
1768               adj0->sub_type.midchain.fixup_func
1769                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1770               adj1->sub_type.midchain.fixup_func
1771                 (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
1772             }
1773           if (is_mcast)
1774             {
1775               /*
1776                * copy bytes from the IP address into the MAC rewrite
1777                */
1778               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1779                                           adj0->
1780                                           rewrite_header.dst_mcast_offset,
1781                                           &ip0->dst_address.as_u32[3],
1782                                           (u8 *) ip0);
1783               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1784                                           adj1->
1785                                           rewrite_header.dst_mcast_offset,
1786                                           &ip1->dst_address.as_u32[3],
1787                                           (u8 *) ip1);
1788             }
1789
1790           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1791                                            to_next, n_left_to_next,
1792                                            pi0, pi1, next0, next1);
1793         }
1794
1795       while (n_left_from > 0 && n_left_to_next > 0)
1796         {
1797           ip_adjacency_t *adj0;
1798           vlib_buffer_t *p0;
1799           ip6_header_t *ip0;
1800           u32 pi0, rw_len0;
1801           u32 adj_index0, next0, error0;
1802           u32 tx_sw_if_index0;
1803
1804           pi0 = to_next[0] = from[0];
1805
1806           p0 = vlib_get_buffer (vm, pi0);
1807
1808           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1809
1810           adj0 = adj_get (adj_index0);
1811
1812           ip0 = vlib_buffer_get_current (p0);
1813
1814           error0 = IP6_ERROR_NONE;
1815           next0 = IP6_REWRITE_NEXT_DROP;
1816
1817           /* Check hop limit */
1818           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
1819             {
1820               i32 hop_limit0 = ip0->hop_limit;
1821
1822               ASSERT (ip0->hop_limit > 0);
1823
1824               hop_limit0 -= 1;
1825
1826               ip0->hop_limit = hop_limit0;
1827
1828               if (PREDICT_FALSE (hop_limit0 <= 0))
1829                 {
1830                   /*
1831                    * If the hop count drops below 1 when forwarding, generate
1832                    * an ICMP response.
1833                    */
1834                   error0 = IP6_ERROR_TIME_EXPIRED;
1835                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1836                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1837                   icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
1838                                                ICMP6_time_exceeded_ttl_exceeded_in_transit,
1839                                                0);
1840                 }
1841             }
1842           else
1843             {
1844               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1845             }
1846
1847           /* Guess we are only writing on simple Ethernet header. */
1848           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1849
1850           /* Update packet buffer attributes/set output interface. */
1851           rw_len0 = adj0[0].rewrite_header.data_bytes;
1852           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
1853
1854           if (do_counters)
1855             {
1856               vlib_increment_combined_counter
1857                 (&adjacency_counters,
1858                  thread_index, adj_index0, 1,
1859                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
1860             }
1861
1862           /* Check MTU of outgoing interface. */
1863           ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) +
1864                          sizeof (ip6_header_t),
1865                          adj0[0].rewrite_header.max_l3_packet_bytes,
1866                          &next0, &error0);
1867
1868           /* Don't adjust the buffer for hop count issue; icmp-error node
1869            * wants to see the IP header */
1870           if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
1871             {
1872               p0->current_data -= rw_len0;
1873               p0->current_length += rw_len0;
1874
1875               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1876
1877               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
1878               next0 = adj0[0].rewrite_header.next_index;
1879
1880               if (PREDICT_FALSE
1881                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
1882                 vnet_feature_arc_start (lm->output_feature_arc_index,
1883                                         tx_sw_if_index0, &next0, p0);
1884             }
1885
1886           if (is_midchain)
1887             {
1888               adj0->sub_type.midchain.fixup_func
1889                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
1890             }
1891           if (is_mcast)
1892             {
1893               vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
1894                                           adj0->
1895                                           rewrite_header.dst_mcast_offset,
1896                                           &ip0->dst_address.as_u32[3],
1897                                           (u8 *) ip0);
1898             }
1899
1900           p0->error = error_node->errors[error0];
1901
1902           from += 1;
1903           n_left_from -= 1;
1904           to_next += 1;
1905           n_left_to_next -= 1;
1906
1907           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1908                                            to_next, n_left_to_next,
1909                                            pi0, next0);
1910         }
1911
1912       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1913     }
1914
1915   /* Need to do trace after rewrites to pick up new packet data. */
1916   if (node->flags & VLIB_NODE_FLAG_TRACE)
1917     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1918
1919   return frame->n_vectors;
1920 }
1921
1922 static uword
1923 ip6_rewrite (vlib_main_t * vm,
1924              vlib_node_runtime_t * node, vlib_frame_t * frame)
1925 {
1926   if (adj_are_counters_enabled ())
1927     return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
1928   else
1929     return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
1930 }
1931
1932 static uword
1933 ip6_rewrite_mcast (vlib_main_t * vm,
1934                    vlib_node_runtime_t * node, vlib_frame_t * frame)
1935 {
1936   if (adj_are_counters_enabled ())
1937     return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
1938   else
1939     return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
1940 }
1941
1942 static uword
1943 ip6_midchain (vlib_main_t * vm,
1944               vlib_node_runtime_t * node, vlib_frame_t * frame)
1945 {
1946   if (adj_are_counters_enabled ())
1947     return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
1948   else
1949     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
1950 }
1951
1952 static uword
1953 ip6_mcast_midchain (vlib_main_t * vm,
1954                     vlib_node_runtime_t * node, vlib_frame_t * frame)
1955 {
1956   if (adj_are_counters_enabled ())
1957     return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
1958   else
1959     return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
1960 }
1961
1962 /* *INDENT-OFF* */
1963 VLIB_REGISTER_NODE (ip6_midchain_node) =
1964 {
1965   .function = ip6_midchain,
1966   .name = "ip6-midchain",
1967   .vector_size = sizeof (u32),
1968   .format_trace = format_ip6_forward_next_trace,
1969   .sibling_of = "ip6-rewrite",
1970   };
1971 /* *INDENT-ON* */
1972
1973 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain);
1974
1975 /* *INDENT-OFF* */
1976 VLIB_REGISTER_NODE (ip6_rewrite_node) =
1977 {
1978   .function = ip6_rewrite,
1979   .name = "ip6-rewrite",
1980   .vector_size = sizeof (u32),
1981   .format_trace = format_ip6_rewrite_trace,
1982   .n_next_nodes = 2,
1983   .next_nodes =
1984   {
1985     [IP6_REWRITE_NEXT_DROP] = "ip6-drop",
1986     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1987   },
1988 };
1989 /* *INDENT-ON* */
1990
1991 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
1992
1993 /* *INDENT-OFF* */
1994 VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
1995 {
1996   .function = ip6_rewrite_mcast,
1997   .name = "ip6-rewrite-mcast",
1998   .vector_size = sizeof (u32),
1999   .format_trace = format_ip6_rewrite_trace,
2000   .sibling_of = "ip6-rewrite",
2001 };
2002 /* *INDENT-ON* */
2003
2004 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
2005
2006 /* *INDENT-OFF* */
2007 VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) =
2008 {
2009   .function = ip6_mcast_midchain,
2010   .name = "ip6-mcast-midchain",
2011   .vector_size = sizeof (u32),
2012   .format_trace = format_ip6_rewrite_trace,
2013   .sibling_of = "ip6-rewrite",
2014 };
2015 /* *INDENT-ON* */
2016
2017 VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain);
2018
2019 /*
2020  * Hop-by-Hop handling
2021  */
2022 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2023
2024 #define foreach_ip6_hop_by_hop_error \
2025 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2026 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2027 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2028
2029 /* *INDENT-OFF* */
2030 typedef enum
2031 {
2032 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2033   foreach_ip6_hop_by_hop_error
2034 #undef _
2035   IP6_HOP_BY_HOP_N_ERROR,
2036 } ip6_hop_by_hop_error_t;
2037 /* *INDENT-ON* */
2038
2039 /*
2040  * Primary h-b-h handler trace support
2041  * We work pretty hard on the problem for obvious reasons
2042  */
2043 typedef struct
2044 {
2045   u32 next_index;
2046   u32 trace_len;
2047   u8 option_data[256];
2048 } ip6_hop_by_hop_trace_t;
2049
2050 vlib_node_registration_t ip6_hop_by_hop_node;
2051
2052 static char *ip6_hop_by_hop_error_strings[] = {
2053 #define _(sym,string) string,
2054   foreach_ip6_hop_by_hop_error
2055 #undef _
2056 };
2057
2058 u8 *
2059 format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
2060 {
2061   ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
2062   int total_len = va_arg (*args, int);
2063   ip6_hop_by_hop_option_t *opt0, *limit0;
2064   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2065   u8 type0;
2066
2067   s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
2068               hbh0->protocol, (hbh0->length + 1) << 3, total_len);
2069
2070   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2071   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
2072
2073   while (opt0 < limit0)
2074     {
2075       type0 = opt0->type;
2076       switch (type0)
2077         {
2078         case 0:         /* Pad, just stop */
2079           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
2080           break;
2081
2082         default:
2083           if (hm->trace[type0])
2084             {
2085               s = (*hm->trace[type0]) (s, opt0);
2086             }
2087           else
2088             {
2089               s =
2090                 format (s, "\n    unrecognized option %d length %d", type0,
2091                         opt0->length);
2092             }
2093           opt0 =
2094             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2095                                          sizeof (ip6_hop_by_hop_option_t));
2096           break;
2097         }
2098     }
2099   return s;
2100 }
2101
2102 static u8 *
2103 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2104 {
2105   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2106   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2107   ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2108   ip6_hop_by_hop_header_t *hbh0;
2109   ip6_hop_by_hop_option_t *opt0, *limit0;
2110   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2111
2112   u8 type0;
2113
2114   hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
2115
2116   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2117               t->next_index, (hbh0->length + 1) << 3, t->trace_len);
2118
2119   opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2120   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
2121
2122   while (opt0 < limit0)
2123     {
2124       type0 = opt0->type;
2125       switch (type0)
2126         {
2127         case 0:         /* Pad, just stop */
2128           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2129           break;
2130
2131         default:
2132           if (hm->trace[type0])
2133             {
2134               s = (*hm->trace[type0]) (s, opt0);
2135             }
2136           else
2137             {
2138               s =
2139                 format (s, "\n    unrecognized option %d length %d", type0,
2140                         opt0->length);
2141             }
2142           opt0 =
2143             (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2144                                          sizeof (ip6_hop_by_hop_option_t));
2145           break;
2146         }
2147     }
2148   return s;
2149 }
2150
2151 always_inline u8
2152 ip6_scan_hbh_options (vlib_buffer_t * b0,
2153                       ip6_header_t * ip0,
2154                       ip6_hop_by_hop_header_t * hbh0,
2155                       ip6_hop_by_hop_option_t * opt0,
2156                       ip6_hop_by_hop_option_t * limit0, u32 * next0)
2157 {
2158   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2159   u8 type0;
2160   u8 error0 = 0;
2161
2162   while (opt0 < limit0)
2163     {
2164       type0 = opt0->type;
2165       switch (type0)
2166         {
2167         case 0:         /* Pad1 */
2168           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
2169           continue;
2170         case 1:         /* PadN */
2171           break;
2172         default:
2173           if (hm->options[type0])
2174             {
2175               if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
2176                 {
2177                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2178                   return (error0);
2179                 }
2180             }
2181           else
2182             {
2183               /* Unrecognized mandatory option, check the two high order bits */
2184               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2185                 {
2186                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2187                   break;
2188                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2189                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2190                   *next0 = IP_LOOKUP_NEXT_DROP;
2191                   break;
2192                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2193                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2194                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2195                   icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
2196                                                ICMP6_parameter_problem_unrecognized_option,
2197                                                (u8 *) opt0 - (u8 *) ip0);
2198                   break;
2199                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2200                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2201                   if (!ip6_address_is_multicast (&ip0->dst_address))
2202                     {
2203                       *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2204                       icmp6_error_set_vnet_buffer (b0,
2205                                                    ICMP6_parameter_problem,
2206                                                    ICMP6_parameter_problem_unrecognized_option,
2207                                                    (u8 *) opt0 - (u8 *) ip0);
2208                     }
2209                   else
2210                     {
2211                       *next0 = IP_LOOKUP_NEXT_DROP;
2212                     }
2213                   break;
2214                 }
2215               return (error0);
2216             }
2217         }
2218       opt0 =
2219         (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
2220                                      sizeof (ip6_hop_by_hop_option_t));
2221     }
2222   return (error0);
2223 }
2224
2225 /*
2226  * Process the Hop-by-Hop Options header
2227  */
2228 static uword
2229 ip6_hop_by_hop (vlib_main_t * vm,
2230                 vlib_node_runtime_t * node, vlib_frame_t * frame)
2231 {
2232   vlib_node_runtime_t *error_node =
2233     vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
2234   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2235   u32 n_left_from, *from, *to_next;
2236   ip_lookup_next_t next_index;
2237
2238   from = vlib_frame_vector_args (frame);
2239   n_left_from = frame->n_vectors;
2240   next_index = node->cached_next_index;
2241
2242   while (n_left_from > 0)
2243     {
2244       u32 n_left_to_next;
2245
2246       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2247
2248       while (n_left_from >= 4 && n_left_to_next >= 2)
2249         {
2250           u32 bi0, bi1;
2251           vlib_buffer_t *b0, *b1;
2252           u32 next0, next1;
2253           ip6_header_t *ip0, *ip1;
2254           ip6_hop_by_hop_header_t *hbh0, *hbh1;
2255           ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2256           u8 error0 = 0, error1 = 0;
2257
2258           /* Prefetch next iteration. */
2259           {
2260             vlib_buffer_t *p2, *p3;
2261
2262             p2 = vlib_get_buffer (vm, from[2]);
2263             p3 = vlib_get_buffer (vm, from[3]);
2264
2265             vlib_prefetch_buffer_header (p2, LOAD);
2266             vlib_prefetch_buffer_header (p3, LOAD);
2267
2268             CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2269             CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
2270           }
2271
2272           /* Speculatively enqueue b0, b1 to the current next frame */
2273           to_next[0] = bi0 = from[0];
2274           to_next[1] = bi1 = from[1];
2275           from += 2;
2276           to_next += 2;
2277           n_left_from -= 2;
2278           n_left_to_next -= 2;
2279
2280           b0 = vlib_get_buffer (vm, bi0);
2281           b1 = vlib_get_buffer (vm, bi1);
2282
2283           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2284           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2285           ip_adjacency_t *adj0 = adj_get (adj_index0);
2286           u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
2287           ip_adjacency_t *adj1 = adj_get (adj_index1);
2288
2289           /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2290           next0 = adj0->lookup_next_index;
2291           next1 = adj1->lookup_next_index;
2292
2293           ip0 = vlib_buffer_get_current (b0);
2294           ip1 = vlib_buffer_get_current (b1);
2295           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2296           hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
2297           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2298           opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
2299           limit0 =
2300             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2301                                          ((hbh0->length + 1) << 3));
2302           limit1 =
2303             (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
2304                                          ((hbh1->length + 1) << 3));
2305
2306           /*
2307            * Basic validity checks
2308            */
2309           if ((hbh0->length + 1) << 3 >
2310               clib_net_to_host_u16 (ip0->payload_length))
2311             {
2312               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2313               next0 = IP_LOOKUP_NEXT_DROP;
2314               goto outdual;
2315             }
2316           /* Scan the set of h-b-h options, process ones that we understand */
2317           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2318
2319           if ((hbh1->length + 1) << 3 >
2320               clib_net_to_host_u16 (ip1->payload_length))
2321             {
2322               error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2323               next1 = IP_LOOKUP_NEXT_DROP;
2324               goto outdual;
2325             }
2326           /* Scan the set of h-b-h options, process ones that we understand */
2327           error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
2328
2329         outdual:
2330           /* Has the classifier flagged this buffer for special treatment? */
2331           if (PREDICT_FALSE
2332               ((error0 == 0)
2333                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2334             next0 = hm->next_override;
2335
2336           /* Has the classifier flagged this buffer for special treatment? */
2337           if (PREDICT_FALSE
2338               ((error1 == 0)
2339                && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
2340             next1 = hm->next_override;
2341
2342           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
2343             {
2344               if (b0->flags & VLIB_BUFFER_IS_TRACED)
2345                 {
2346                   ip6_hop_by_hop_trace_t *t =
2347                     vlib_add_trace (vm, node, b0, sizeof (*t));
2348                   u32 trace_len = (hbh0->length + 1) << 3;
2349                   t->next_index = next0;
2350                   /* Capture the h-b-h option verbatim */
2351                   trace_len =
2352                     trace_len <
2353                     ARRAY_LEN (t->option_data) ? trace_len :
2354                     ARRAY_LEN (t->option_data);
2355                   t->trace_len = trace_len;
2356                   clib_memcpy (t->option_data, hbh0, trace_len);
2357                 }
2358               if (b1->flags & VLIB_BUFFER_IS_TRACED)
2359                 {
2360                   ip6_hop_by_hop_trace_t *t =
2361                     vlib_add_trace (vm, node, b1, sizeof (*t));
2362                   u32 trace_len = (hbh1->length + 1) << 3;
2363                   t->next_index = next1;
2364                   /* Capture the h-b-h option verbatim */
2365                   trace_len =
2366                     trace_len <
2367                     ARRAY_LEN (t->option_data) ? trace_len :
2368                     ARRAY_LEN (t->option_data);
2369                   t->trace_len = trace_len;
2370                   clib_memcpy (t->option_data, hbh1, trace_len);
2371                 }
2372
2373             }
2374
2375           b0->error = error_node->errors[error0];
2376           b1->error = error_node->errors[error1];
2377
2378           /* verify speculative enqueue, maybe switch current next frame */
2379           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
2380                                            n_left_to_next, bi0, bi1, next0,
2381                                            next1);
2382         }
2383
2384       while (n_left_from > 0 && n_left_to_next > 0)
2385         {
2386           u32 bi0;
2387           vlib_buffer_t *b0;
2388           u32 next0;
2389           ip6_header_t *ip0;
2390           ip6_hop_by_hop_header_t *hbh0;
2391           ip6_hop_by_hop_option_t *opt0, *limit0;
2392           u8 error0 = 0;
2393
2394           /* Speculatively enqueue b0 to the current next frame */
2395           bi0 = from[0];
2396           to_next[0] = bi0;
2397           from += 1;
2398           to_next += 1;
2399           n_left_from -= 1;
2400           n_left_to_next -= 1;
2401
2402           b0 = vlib_get_buffer (vm, bi0);
2403           /*
2404            * Default use the next_index from the adjacency.
2405            * A HBH option rarely redirects to a different node
2406            */
2407           u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
2408           ip_adjacency_t *adj0 = adj_get (adj_index0);
2409           next0 = adj0->lookup_next_index;
2410
2411           ip0 = vlib_buffer_get_current (b0);
2412           hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
2413           opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
2414           limit0 =
2415             (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
2416                                          ((hbh0->length + 1) << 3));
2417
2418           /*
2419            * Basic validity checks
2420            */
2421           if ((hbh0->length + 1) << 3 >
2422               clib_net_to_host_u16 (ip0->payload_length))
2423             {
2424               error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2425               next0 = IP_LOOKUP_NEXT_DROP;
2426               goto out0;
2427             }
2428
2429           /* Scan the set of h-b-h options, process ones that we understand */
2430           error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
2431
2432         out0:
2433           /* Has the classifier flagged this buffer for special treatment? */
2434           if (PREDICT_FALSE
2435               ((error0 == 0)
2436                && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
2437             next0 = hm->next_override;
2438
2439           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2440             {
2441               ip6_hop_by_hop_trace_t *t =
2442                 vlib_add_trace (vm, node, b0, sizeof (*t));
2443               u32 trace_len = (hbh0->length + 1) << 3;
2444               t->next_index = next0;
2445               /* Capture the h-b-h option verbatim */
2446               trace_len =
2447                 trace_len <
2448                 ARRAY_LEN (t->option_data) ? trace_len :
2449                 ARRAY_LEN (t->option_data);
2450               t->trace_len = trace_len;
2451               clib_memcpy (t->option_data, hbh0, trace_len);
2452             }
2453
2454           b0->error = error_node->errors[error0];
2455
2456           /* verify speculative enqueue, maybe switch current next frame */
2457           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2458                                            n_left_to_next, bi0, next0);
2459         }
2460       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2461     }
2462   return frame->n_vectors;
2463 }
2464
2465 /* *INDENT-OFF* */
2466 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
2467 {
2468   .function = ip6_hop_by_hop,
2469   .name = "ip6-hop-by-hop",
2470   .sibling_of = "ip6-lookup",
2471   .vector_size = sizeof (u32),
2472   .format_trace = format_ip6_hop_by_hop_trace,
2473   .type = VLIB_NODE_TYPE_INTERNAL,
2474   .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
2475   .error_strings = ip6_hop_by_hop_error_strings,
2476   .n_next_nodes = 0,
2477 };
2478 /* *INDENT-ON* */
2479
2480 VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
2481
2482 static clib_error_t *
2483 ip6_hop_by_hop_init (vlib_main_t * vm)
2484 {
2485   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2486   memset (hm->options, 0, sizeof (hm->options));
2487   memset (hm->trace, 0, sizeof (hm->trace));
2488   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2489   return (0);
2490 }
2491
2492 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2493
2494 void
2495 ip6_hbh_set_next_override (uword next)
2496 {
2497   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2498
2499   hm->next_override = next;
2500 }
2501
2502 int
2503 ip6_hbh_register_option (u8 option,
2504                          int options (vlib_buffer_t * b, ip6_header_t * ip,
2505                                       ip6_hop_by_hop_option_t * opt),
2506                          u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
2507 {
2508   ip6_main_t *im = &ip6_main;
2509   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2510
2511   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2512
2513   /* Already registered */
2514   if (hm->options[option])
2515     return (-1);
2516
2517   hm->options[option] = options;
2518   hm->trace[option] = trace;
2519
2520   /* Set global variable */
2521   im->hbh_enabled = 1;
2522
2523   return (0);
2524 }
2525
2526 int
2527 ip6_hbh_unregister_option (u8 option)
2528 {
2529   ip6_main_t *im = &ip6_main;
2530   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2531
2532   ASSERT ((u32) option < ARRAY_LEN (hm->options));
2533
2534   /* Not registered */
2535   if (!hm->options[option])
2536     return (-1);
2537
2538   hm->options[option] = NULL;
2539   hm->trace[option] = NULL;
2540
2541   /* Disable global knob if this was the last option configured */
2542   int i;
2543   bool found = false;
2544   for (i = 0; i < 256; i++)
2545     {
2546       if (hm->options[option])
2547         {
2548           found = true;
2549           break;
2550         }
2551     }
2552   if (!found)
2553     im->hbh_enabled = 0;
2554
2555   return (0);
2556 }
2557
2558 /* Global IP6 main. */
2559 ip6_main_t ip6_main;
2560
2561 static clib_error_t *
2562 ip6_lookup_init (vlib_main_t * vm)
2563 {
2564   ip6_main_t *im = &ip6_main;
2565   clib_error_t *error;
2566   uword i;
2567
2568   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2569     return error;
2570
2571   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2572     {
2573       u32 j, i0, i1;
2574
2575       i0 = i / 32;
2576       i1 = i % 32;
2577
2578       for (j = 0; j < i0; j++)
2579         im->fib_masks[i].as_u32[j] = ~0;
2580
2581       if (i1)
2582         im->fib_masks[i].as_u32[i0] =
2583           clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2584     }
2585
2586   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2587
2588   if (im->lookup_table_nbuckets == 0)
2589     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2590
2591   im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
2592
2593   if (im->lookup_table_size == 0)
2594     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2595
2596   BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2597                          "ip6 FIB fwding table",
2598                          im->lookup_table_nbuckets, im->lookup_table_size);
2599   BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2600                          "ip6 FIB non-fwding table",
2601                          im->lookup_table_nbuckets, im->lookup_table_size);
2602
2603   /* Create FIB with index 0 and table id of 0. */
2604   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2605                                      FIB_SOURCE_DEFAULT_ROUTE);
2606   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
2607                                       MFIB_SOURCE_DEFAULT_ROUTE);
2608
2609   {
2610     pg_node_t *pn;
2611     pn = pg_get_node (ip6_lookup_node.index);
2612     pn->unformat_edit = unformat_pg_ip6_header;
2613   }
2614
2615   /* Unless explicitly configured, don't process HBH options */
2616   im->hbh_enabled = 0;
2617
2618   {
2619     icmp6_neighbor_solicitation_header_t p;
2620
2621     memset (&p, 0, sizeof (p));
2622
2623     p.ip.ip_version_traffic_class_and_flow_label =
2624       clib_host_to_net_u32 (0x6 << 28);
2625     p.ip.payload_length =
2626       clib_host_to_net_u16 (sizeof (p) -
2627                             STRUCT_OFFSET_OF
2628                             (icmp6_neighbor_solicitation_header_t, neighbor));
2629     p.ip.protocol = IP_PROTOCOL_ICMP6;
2630     p.ip.hop_limit = 255;
2631     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2632
2633     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2634
2635     p.link_layer_option.header.type =
2636       ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2637     p.link_layer_option.header.n_data_u64s =
2638       sizeof (p.link_layer_option) / sizeof (u64);
2639
2640     vlib_packet_template_init (vm,
2641                                &im->discover_neighbor_packet_template,
2642                                &p, sizeof (p),
2643                                /* alloc chunk size */ 8,
2644                                "ip6 neighbor discovery");
2645   }
2646
2647   return error;
2648 }
2649
2650 VLIB_INIT_FUNCTION (ip6_lookup_init);
2651
2652 void
2653 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
2654                                                   u8 * mac)
2655 {
2656   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2657   /* Invert the "u" bit */
2658   ip->as_u8[8] = mac[0] ^ (1 << 1);
2659   ip->as_u8[9] = mac[1];
2660   ip->as_u8[10] = mac[2];
2661   ip->as_u8[11] = 0xFF;
2662   ip->as_u8[12] = 0xFE;
2663   ip->as_u8[13] = mac[3];
2664   ip->as_u8[14] = mac[4];
2665   ip->as_u8[15] = mac[5];
2666 }
2667
2668 void
2669 ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
2670                                                   ip6_address_t * ip)
2671 {
2672   /* Invert the previously inverted "u" bit */
2673   mac[0] = ip->as_u8[8] ^ (1 << 1);
2674   mac[1] = ip->as_u8[9];
2675   mac[2] = ip->as_u8[10];
2676   mac[3] = ip->as_u8[13];
2677   mac[4] = ip->as_u8[14];
2678   mac[5] = ip->as_u8[15];
2679 }
2680
2681 static clib_error_t *
2682 test_ip6_link_command_fn (vlib_main_t * vm,
2683                           unformat_input_t * input, vlib_cli_command_t * cmd)
2684 {
2685   u8 mac[6];
2686   ip6_address_t _a, *a = &_a;
2687
2688   if (unformat (input, "%U", unformat_ethernet_address, mac))
2689     {
2690       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2691       vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
2692       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2693       vlib_cli_output (vm, "Original MAC address: %U",
2694                        format_ethernet_address, mac);
2695     }
2696
2697   return 0;
2698 }
2699
2700 /*?
2701  * This command converts the given MAC Address into an IPv6 link-local
2702  * address.
2703  *
2704  * @cliexpar
2705  * Example of how to create an IPv6 link-local address:
2706  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2707  * Link local address: fe80::14d9:e0ff:fe91:7986
2708  * Original MAC address: 16:d9:e0:91:79:86
2709  * @cliexend
2710 ?*/
2711 /* *INDENT-OFF* */
2712 VLIB_CLI_COMMAND (test_link_command, static) =
2713 {
2714   .path = "test ip6 link",
2715   .function = test_ip6_link_command_fn,
2716   .short_help = "test ip6 link <mac-address>",
2717 };
2718 /* *INDENT-ON* */
2719
2720 int
2721 vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2722 {
2723   u32 fib_index;
2724
2725   fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
2726
2727   if (~0 == fib_index)
2728     return VNET_API_ERROR_NO_SUCH_FIB;
2729
2730   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
2731                                   flow_hash_config);
2732
2733   return 0;
2734 }
2735
2736 static clib_error_t *
2737 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2738                               unformat_input_t * input,
2739                               vlib_cli_command_t * cmd)
2740 {
2741   int matched = 0;
2742   u32 table_id = 0;
2743   u32 flow_hash_config = 0;
2744   int rv;
2745
2746   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2747     {
2748       if (unformat (input, "table %d", &table_id))
2749         matched = 1;
2750 #define _(a,v) \
2751     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2752       foreach_flow_hash_bit
2753 #undef _
2754         else
2755         break;
2756     }
2757
2758   if (matched == 0)
2759     return clib_error_return (0, "unknown input `%U'",
2760                               format_unformat_error, input);
2761
2762   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2763   switch (rv)
2764     {
2765     case 0:
2766       break;
2767
2768     case -1:
2769       return clib_error_return (0, "no such FIB table %d", table_id);
2770
2771     default:
2772       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2773       break;
2774     }
2775
2776   return 0;
2777 }
2778
2779 /*?
2780  * Configure the set of IPv6 fields used by the flow hash.
2781  *
2782  * @cliexpar
2783  * @parblock
2784  * Example of how to set the flow hash on a given table:
2785  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2786  *
2787  * Example of display the configured flow hash:
2788  * @cliexstart{show ip6 fib}
2789  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2790  * @::/0
2791  *   unicast-ip6-chain
2792  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2793  *     [0] [@0]: dpo-drop ip6
2794  * fe80::/10
2795  *   unicast-ip6-chain
2796  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2797  *     [0] [@2]: dpo-receive
2798  * ff02::1/128
2799  *   unicast-ip6-chain
2800  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2801  *     [0] [@2]: dpo-receive
2802  * ff02::2/128
2803  *   unicast-ip6-chain
2804  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2805  *     [0] [@2]: dpo-receive
2806  * ff02::16/128
2807  *   unicast-ip6-chain
2808  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2809  *     [0] [@2]: dpo-receive
2810  * ff02::1:ff00:0/104
2811  *   unicast-ip6-chain
2812  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2813  *     [0] [@2]: dpo-receive
2814  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2815  * @::/0
2816  *   unicast-ip6-chain
2817  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2818  *     [0] [@0]: dpo-drop ip6
2819  * @::a:1:1:0:4/126
2820  *   unicast-ip6-chain
2821  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2822  *     [0] [@4]: ipv6-glean: af_packet0
2823  * @::a:1:1:0:7/128
2824  *   unicast-ip6-chain
2825  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2826  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2827  * fe80::/10
2828  *   unicast-ip6-chain
2829  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2830  *     [0] [@2]: dpo-receive
2831  * fe80::fe:3eff:fe3e:9222/128
2832  *   unicast-ip6-chain
2833  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2834  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2835  * ff02::1/128
2836  *   unicast-ip6-chain
2837  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2838  *     [0] [@2]: dpo-receive
2839  * ff02::2/128
2840  *   unicast-ip6-chain
2841  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2842  *     [0] [@2]: dpo-receive
2843  * ff02::16/128
2844  *   unicast-ip6-chain
2845  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2846  *     [0] [@2]: dpo-receive
2847  * ff02::1:ff00:0/104
2848  *   unicast-ip6-chain
2849  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2850  *     [0] [@2]: dpo-receive
2851  * @cliexend
2852  * @endparblock
2853 ?*/
2854 /* *INDENT-OFF* */
2855 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
2856 {
2857   .path = "set ip6 flow-hash",
2858   .short_help =
2859   "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2860   .function = set_ip6_flow_hash_command_fn,
2861 };
2862 /* *INDENT-ON* */
2863
2864 static clib_error_t *
2865 show_ip6_local_command_fn (vlib_main_t * vm,
2866                            unformat_input_t * input, vlib_cli_command_t * cmd)
2867 {
2868   ip6_main_t *im = &ip6_main;
2869   ip_lookup_main_t *lm = &im->lookup_main;
2870   int i;
2871
2872   vlib_cli_output (vm, "Protocols handled by ip6_local");
2873   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2874     {
2875       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2876         {
2877
2878           u32 node_index = vlib_get_node (vm,
2879                                           ip6_local_node.index)->
2880             next_nodes[lm->local_next_by_ip_protocol[i]];
2881           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
2882                            node_index);
2883         }
2884     }
2885   return 0;
2886 }
2887
2888
2889
2890 /*?
2891  * Display the set of protocols handled by the local IPv6 stack.
2892  *
2893  * @cliexpar
2894  * Example of how to display local protocol table:
2895  * @cliexstart{show ip6 local}
2896  * Protocols handled by ip6_local
2897  * 17
2898  * 43
2899  * 58
2900  * 115
2901  * @cliexend
2902 ?*/
2903 /* *INDENT-OFF* */
2904 VLIB_CLI_COMMAND (show_ip6_local, static) =
2905 {
2906   .path = "show ip6 local",
2907   .function = show_ip6_local_command_fn,
2908   .short_help = "show ip6 local",
2909 };
2910 /* *INDENT-ON* */
2911
2912 int
2913 vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2914                              u32 table_index)
2915 {
2916   vnet_main_t *vnm = vnet_get_main ();
2917   vnet_interface_main_t *im = &vnm->interface_main;
2918   ip6_main_t *ipm = &ip6_main;
2919   ip_lookup_main_t *lm = &ipm->lookup_main;
2920   vnet_classify_main_t *cm = &vnet_classify_main;
2921   ip6_address_t *if_addr;
2922
2923   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2924     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2925
2926   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2927     return VNET_API_ERROR_NO_SUCH_ENTRY;
2928
2929   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2930   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2931
2932   if_addr = ip6_interface_first_address (ipm, sw_if_index);
2933
2934   if (NULL != if_addr)
2935     {
2936       fib_prefix_t pfx = {
2937         .fp_len = 128,
2938         .fp_proto = FIB_PROTOCOL_IP6,
2939         .fp_addr.ip6 = *if_addr,
2940       };
2941       u32 fib_index;
2942
2943       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2944                                                        sw_if_index);
2945
2946
2947       if (table_index != (u32) ~ 0)
2948         {
2949           dpo_id_t dpo = DPO_INVALID;
2950
2951           dpo_set (&dpo,
2952                    DPO_CLASSIFY,
2953                    DPO_PROTO_IP6,
2954                    classify_dpo_create (DPO_PROTO_IP6, table_index));
2955
2956           fib_table_entry_special_dpo_add (fib_index,
2957                                            &pfx,
2958                                            FIB_SOURCE_CLASSIFY,
2959                                            FIB_ENTRY_FLAG_NONE, &dpo);
2960           dpo_reset (&dpo);
2961         }
2962       else
2963         {
2964           fib_table_entry_special_remove (fib_index,
2965                                           &pfx, FIB_SOURCE_CLASSIFY);
2966         }
2967     }
2968
2969   return 0;
2970 }
2971
2972 static clib_error_t *
2973 set_ip6_classify_command_fn (vlib_main_t * vm,
2974                              unformat_input_t * input,
2975                              vlib_cli_command_t * cmd)
2976 {
2977   u32 table_index = ~0;
2978   int table_index_set = 0;
2979   u32 sw_if_index = ~0;
2980   int rv;
2981
2982   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2983     {
2984       if (unformat (input, "table-index %d", &table_index))
2985         table_index_set = 1;
2986       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2987                          vnet_get_main (), &sw_if_index))
2988         ;
2989       else
2990         break;
2991     }
2992
2993   if (table_index_set == 0)
2994     return clib_error_return (0, "classify table-index must be specified");
2995
2996   if (sw_if_index == ~0)
2997     return clib_error_return (0, "interface / subif must be specified");
2998
2999   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3000
3001   switch (rv)
3002     {
3003     case 0:
3004       break;
3005
3006     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3007       return clib_error_return (0, "No such interface");
3008
3009     case VNET_API_ERROR_NO_SUCH_ENTRY:
3010       return clib_error_return (0, "No such classifier table");
3011     }
3012   return 0;
3013 }
3014
3015 /*?
3016  * Assign a classification table to an interface. The classification
3017  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3018  * commands. Once the table is create, use this command to filter packets
3019  * on an interface.
3020  *
3021  * @cliexpar
3022  * Example of how to assign a classification table to an interface:
3023  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3024 ?*/
3025 /* *INDENT-OFF* */
3026 VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
3027 {
3028   .path = "set ip6 classify",
3029   .short_help =
3030   "set ip6 classify intfc <interface> table-index <classify-idx>",
3031   .function = set_ip6_classify_command_fn,
3032 };
3033 /* *INDENT-ON* */
3034
3035 static clib_error_t *
3036 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3037 {
3038   ip6_main_t *im = &ip6_main;
3039   uword heapsize = 0;
3040   u32 tmp;
3041   u32 nbuckets = 0;
3042
3043   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3044     {
3045       if (unformat (input, "hash-buckets %d", &tmp))
3046         nbuckets = tmp;
3047       else if (unformat (input, "heap-size %U",
3048                          unformat_memory_size, &heapsize))
3049         ;
3050       else
3051         return clib_error_return (0, "unknown input '%U'",
3052                                   format_unformat_error, input);
3053     }
3054
3055   im->lookup_table_nbuckets = nbuckets;
3056   im->lookup_table_size = heapsize;
3057
3058   return 0;
3059 }
3060
3061 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
3062
3063 /*
3064  * fd.io coding-style-patch-verification: ON
3065  *
3066  * Local Variables:
3067  * eval: (c-set-style "gnu")
3068  * End:
3069  */