325ef9b7b67140989b96c70e2e1cd25599318624
[vpp.git] / vnet / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
44 #include <vppinfra/cache.h>
45 #include <vnet/fib/fib_table.h>
46 #include <vnet/fib/ip6_fib.h>
47 #include <vnet/dpo/load_balance.h>
48 #include <vnet/dpo/classify_dpo.h>
49
50 #include <vppinfra/bihash_template.c>
51
52 /**
53  * @file
54  * @brief IPv6 Forwarding.
55  *
56  * This file contains the source code for IPv6 forwarding.
57  */
58
59 void
60 ip6_forward_next_trace (vlib_main_t * vm,
61                         vlib_node_runtime_t * node,
62                         vlib_frame_t * frame,
63                         vlib_rx_or_tx_t which_adj_index);
64
65 always_inline uword
66 ip6_lookup_inline (vlib_main_t * vm,
67                    vlib_node_runtime_t * node,
68                    vlib_frame_t * frame)
69 {
70   ip6_main_t * im = &ip6_main;
71   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
72   u32 n_left_from, n_left_to_next, * from, * to_next;
73   ip_lookup_next_t next;
74   u32 cpu_index = os_get_cpu_number();
75
76   from = vlib_frame_vector_args (frame);
77   n_left_from = frame->n_vectors;
78   next = node->cached_next_index;
79
80   while (n_left_from > 0)
81     {
82       vlib_get_next_frame (vm, node, next,
83                            to_next, n_left_to_next);
84
85       while (n_left_from >= 4 && n_left_to_next >= 2)
86         {
87           vlib_buffer_t * p0, * p1;
88           u32 pi0, pi1, lbi0, lbi1, wrong_next;
89           ip_lookup_next_t next0, next1;
90           ip6_header_t * ip0, * ip1;
91           ip6_address_t * dst_addr0, * dst_addr1;
92           u32 fib_index0, fib_index1;
93           u32 flow_hash_config0, flow_hash_config1;
94           const dpo_id_t *dpo0, *dpo1;
95           const load_balance_t *lb0, *lb1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
107             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
108           }
109
110           pi0 = to_next[0] = from[0];
111           pi1 = to_next[1] = from[1];
112
113           p0 = vlib_get_buffer (vm, pi0);
114           p1 = vlib_get_buffer (vm, pi1);
115
116           ip0 = vlib_buffer_get_current (p0);
117           ip1 = vlib_buffer_get_current (p1);
118
119           dst_addr0 = &ip0->dst_address;
120           dst_addr1 = &ip1->dst_address;
121
122           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
123           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
124
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
131           lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
132
133           lb0 = load_balance_get (lbi0);
134           lb1 = load_balance_get (lbi1);
135
136           vnet_buffer (p0)->ip.flow_hash =
137             vnet_buffer(p1)->ip.flow_hash = 0;
138
139           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
140             {
141               flow_hash_config0 = lb0->lb_hash_config;
142               vnet_buffer (p0)->ip.flow_hash =
143                 ip6_compute_flow_hash (ip0, flow_hash_config0);
144             }
145           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
146             {
147               flow_hash_config1 = lb1->lb_hash_config;
148               vnet_buffer (p1)->ip.flow_hash =
149                 ip6_compute_flow_hash (ip1, flow_hash_config1);
150             }
151
152           ASSERT (lb0->lb_n_buckets > 0);
153           ASSERT (lb1->lb_n_buckets > 0);
154           ASSERT (is_pow2 (lb0->lb_n_buckets));
155           ASSERT (is_pow2 (lb1->lb_n_buckets));
156           dpo0 = load_balance_get_bucket_i(lb0,
157                                            (vnet_buffer (p0)->ip.flow_hash &
158                                             lb0->lb_n_buckets_minus_1));
159           dpo1 = load_balance_get_bucket_i(lb1,
160                                            (vnet_buffer (p1)->ip.flow_hash &
161                                             lb1->lb_n_buckets_minus_1));
162
163           next0 = dpo0->dpoi_next_node;
164           next1 = dpo1->dpoi_next_node;
165
166           /* Only process the HBH Option Header if explicitly configured to do so */
167           if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
168             {
169               next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ?
170                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
171             }
172           if (PREDICT_FALSE(ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
173             {
174               next1 = (dpo_is_adj(dpo1) && im->hbh_enabled) ?
175                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
176             }
177           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
178           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
179
180           vlib_increment_combined_counter
181               (cm, cpu_index, lbi0, 1,
182                vlib_buffer_length_in_chain (vm, p0));
183           vlib_increment_combined_counter
184               (cm, cpu_index, lbi1, 1,
185                vlib_buffer_length_in_chain (vm, p1));
186
187           from += 2;
188           to_next += 2;
189           n_left_to_next -= 2;
190           n_left_from -= 2;
191
192           wrong_next = (next0 != next) + 2*(next1 != next);
193           if (PREDICT_FALSE (wrong_next != 0))
194             {
195               switch (wrong_next)
196                 {
197                 case 1:
198                   /* A B A */
199                   to_next[-2] = pi1;
200                   to_next -= 1;
201                   n_left_to_next += 1;
202                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
203                   break;
204
205                 case 2:
206                   /* A A B */
207                   to_next -= 1;
208                   n_left_to_next += 1;
209                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
210                   break;
211
212                 case 3:
213                   /* A B C */
214                   to_next -= 2;
215                   n_left_to_next += 2;
216                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
217                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
218                   if (next0 == next1)
219                     {
220                       /* A B B */
221                       vlib_put_next_frame (vm, node, next, n_left_to_next);
222                       next = next1;
223                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
224                     }
225                 }
226             }
227         }
228
229       while (n_left_from > 0 && n_left_to_next > 0)
230         {
231           vlib_buffer_t * p0;
232           ip6_header_t * ip0;
233           u32 pi0, lbi0;
234           ip_lookup_next_t next0;
235           load_balance_t * lb0;
236           ip6_address_t * dst_addr0;
237           u32 fib_index0, flow_hash_config0;
238           const dpo_id_t *dpo0;
239
240           pi0 = from[0];
241           to_next[0] = pi0;
242
243           p0 = vlib_get_buffer (vm, pi0);
244
245           ip0 = vlib_buffer_get_current (p0);
246
247           dst_addr0 = &ip0->dst_address;
248
249           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
250           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
251             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
252
253           flow_hash_config0 =
254               ip6_fib_get (fib_index0)->flow_hash_config;
255
256           lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
257
258           lb0 = load_balance_get (lbi0);
259
260           vnet_buffer (p0)->ip.flow_hash = 0;
261
262           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
263             {
264               flow_hash_config0 = lb0->lb_hash_config;
265               vnet_buffer (p0)->ip.flow_hash =
266                 ip6_compute_flow_hash (ip0, flow_hash_config0);
267             }
268
269           ASSERT (lb0->lb_n_buckets > 0);
270           ASSERT (is_pow2 (lb0->lb_n_buckets));
271           dpo0 = load_balance_get_bucket_i(lb0,
272                                            (vnet_buffer (p0)->ip.flow_hash &
273                                             lb0->lb_n_buckets_minus_1));
274           next0 = dpo0->dpoi_next_node;
275
276           /* Only process the HBH Option Header if explicitly configured to do so */
277           if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
278             {
279               next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ?
280                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
281             }
282           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
283
284           vlib_increment_combined_counter
285               (cm, cpu_index, lbi0, 1,
286                vlib_buffer_length_in_chain (vm, p0));
287
288           from += 1;
289           to_next += 1;
290           n_left_to_next -= 1;
291           n_left_from -= 1;
292
293           if (PREDICT_FALSE (next0 != next))
294             {
295               n_left_to_next += 1;
296               vlib_put_next_frame (vm, node, next, n_left_to_next);
297               next = next0;
298               vlib_get_next_frame (vm, node, next,
299                                    to_next, n_left_to_next);
300               to_next[0] = pi0;
301               to_next += 1;
302               n_left_to_next -= 1;
303             }
304         }
305
306       vlib_put_next_frame (vm, node, next, n_left_to_next);
307     }
308
309   if (node->flags & VLIB_NODE_FLAG_TRACE)
310     ip6_forward_next_trace(vm, node, frame, VLIB_TX);
311
312   return frame->n_vectors;
313 }
314
315 static void
316 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
317                           ip6_main_t * im, u32 fib_index,
318                           ip_interface_address_t * a)
319 {
320   ip_lookup_main_t * lm = &im->lookup_main;
321   ip6_address_t * address = ip_interface_address_get_address (lm, a);
322   fib_prefix_t pfx = {
323       .fp_len = a->address_length,
324       .fp_proto = FIB_PROTOCOL_IP6,
325       .fp_addr.ip6 = *address,
326   };
327
328   a->neighbor_probe_adj_index = ~0;
329   if (a->address_length < 128)
330   {
331       fib_node_index_t fei;
332
333       fei = fib_table_entry_update_one_path(fib_index,
334                                             &pfx,
335                                             FIB_SOURCE_INTERFACE,
336                                             (FIB_ENTRY_FLAG_CONNECTED |
337                                              FIB_ENTRY_FLAG_ATTACHED),
338                                             FIB_PROTOCOL_IP6,
339                                             NULL, /* No next-hop address */
340                                             sw_if_index,
341                                             ~0, // invalid FIB index
342                                             1,
343                                             MPLS_LABEL_INVALID,
344                                             FIB_ROUTE_PATH_FLAG_NONE);
345       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
346   }
347
348   pfx.fp_len = 128;
349   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
350   {
351       u32 classify_table_index =
352           lm->classify_table_index_by_sw_if_index [sw_if_index];
353       if (classify_table_index != (u32) ~0)
354       {
355           dpo_id_t dpo = DPO_INVALID;
356
357           dpo_set(&dpo,
358                   DPO_CLASSIFY,
359                   DPO_PROTO_IP6,
360                   classify_dpo_create(DPO_PROTO_IP6, classify_table_index));
361
362           fib_table_entry_special_dpo_add(fib_index,
363                                           &pfx,
364                                           FIB_SOURCE_CLASSIFY,
365                                           FIB_ENTRY_FLAG_NONE,
366                                           &dpo);
367           dpo_reset(&dpo);
368       }
369   }
370
371   fib_table_entry_update_one_path(fib_index,
372                                   &pfx,
373                                   FIB_SOURCE_INTERFACE,
374                                   (FIB_ENTRY_FLAG_CONNECTED |
375                                    FIB_ENTRY_FLAG_LOCAL),
376                                   FIB_PROTOCOL_IP6,
377                                   &pfx.fp_addr,
378                                   sw_if_index,
379                                   ~0, // invalid FIB index
380                                   1,
381                                   MPLS_LABEL_INVALID,
382                                   FIB_ROUTE_PATH_FLAG_NONE);
383 }
384
385 static void
386 ip6_del_interface_routes (ip6_main_t * im,
387                           u32 fib_index,
388                           ip6_address_t * address,
389                           u32 address_length)
390 {
391     fib_prefix_t pfx = {
392         .fp_len = address_length,
393         .fp_proto = FIB_PROTOCOL_IP6,
394         .fp_addr.ip6 = *address,
395     };
396
397     if (pfx.fp_len < 128)
398     {
399         fib_table_entry_delete(fib_index,
400                                &pfx,
401                                FIB_SOURCE_INTERFACE);
402
403     }
404
405     pfx.fp_len = 128;
406     fib_table_entry_delete(fib_index,
407                            &pfx,
408                            FIB_SOURCE_INTERFACE);
409 }
410
411 void
412 ip6_sw_interface_enable_disable (u32 sw_if_index,
413                                  u32 is_enable)
414 {
415   ip6_main_t * im = &ip6_main;
416
417   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
418
419   /*
420    * enable/disable only on the 1<->0 transition
421    */
422   if (is_enable)
423     {
424       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
425         return;
426     }
427   else
428     {
429       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
430       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
431         return;
432     }
433
434   vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index,
435                                is_enable, 0, 0);
436
437   vnet_feature_enable_disable ("ip6-multicast", "ip6-lookup", sw_if_index,
438                                is_enable, 0, 0);
439
440 }
441
442 /* get first interface address */
443 ip6_address_t *
444 ip6_interface_first_address (ip6_main_t * im,
445                              u32 sw_if_index,
446                              ip_interface_address_t ** result_ia)
447 {
448   ip_lookup_main_t * lm = &im->lookup_main;
449   ip_interface_address_t * ia = 0;
450   ip6_address_t * result = 0;
451
452   foreach_ip_interface_address (lm, ia, sw_if_index,
453                                 1 /* honor unnumbered */,
454   ({
455     ip6_address_t * a = ip_interface_address_get_address (lm, ia);
456     result = a;
457     break;
458   }));
459   if (result_ia)
460     *result_ia = result ? ia : 0;
461   return result;
462 }
463
464 clib_error_t *
465 ip6_add_del_interface_address (vlib_main_t * vm,
466                                u32 sw_if_index,
467                                ip6_address_t * address,
468                                u32 address_length,
469                                u32 is_del)
470 {
471   vnet_main_t * vnm = vnet_get_main();
472   ip6_main_t * im = &ip6_main;
473   ip_lookup_main_t * lm = &im->lookup_main;
474   clib_error_t * error;
475   u32 if_address_index;
476   ip6_address_fib_t ip6_af, * addr_fib = 0;
477
478   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
479   ip6_addr_fib_init (&ip6_af, address,
480                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
481   vec_add1 (addr_fib, ip6_af);
482
483   {
484     uword elts_before = pool_elts (lm->if_address_pool);
485
486     error = ip_interface_address_add_del
487       (lm,
488        sw_if_index,
489        addr_fib,
490        address_length,
491        is_del,
492        &if_address_index);
493     if (error)
494       goto done;
495
496     /* Pool did not grow: add duplicate address. */
497     if (elts_before == pool_elts (lm->if_address_pool))
498       goto done;
499   }
500
501   if (is_del)
502       ip6_del_interface_routes (im, ip6_af.fib_index, address,
503                                 address_length);
504   else
505       ip6_add_interface_routes (vnm, sw_if_index,
506                                 im, ip6_af.fib_index,
507                                 pool_elt_at_index (lm->if_address_pool, if_address_index));
508
509   {
510     ip6_add_del_interface_address_callback_t * cb;
511     vec_foreach (cb, im->add_del_interface_address_callbacks)
512       cb->function (im, cb->function_opaque, sw_if_index,
513                     address, address_length,
514                     if_address_index,
515                     is_del);
516   }
517
518  done:
519   vec_free (addr_fib);
520   return error;
521 }
522
523 clib_error_t *
524 ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
525                                 u32 sw_if_index,
526                                 u32 flags)
527 {
528   ip6_main_t * im = &ip6_main;
529   ip_interface_address_t * ia;
530   ip6_address_t * a;
531   u32 is_admin_up, fib_index;
532
533   /* Fill in lookup tables with default table (0). */
534   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
535
536   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
537
538   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
539
540   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
541
542   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
543                                 0 /* honor unnumbered */,
544   ({
545     a = ip_interface_address_get_address (&im->lookup_main, ia);
546     if (is_admin_up)
547       ip6_add_interface_routes (vnm, sw_if_index,
548                                 im, fib_index,
549                                 ia);
550     else
551       ip6_del_interface_routes (im, fib_index,
552                                 a, ia->address_length);
553   }));
554
555   return 0;
556 }
557
558 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
559
560 /* Built-in ip6 unicast rx feature path definition */
561 VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
562 {
563   .arc_name  = "ip6-unicast",
564   .start_nodes = VNET_FEATURES ("ip6-input"),
565   .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
566 };
567
568 VNET_FEATURE_INIT (ip6_flow_classify, static) = {
569   .arc_name = "ip6-unicast",
570   .node_name = "ip6-flow-classify",
571   .runs_before = VNET_FEATURES ("ip6-inacl"),
572 };
573
574 VNET_FEATURE_INIT (ip6_inacl, static) = {
575   .arc_name = "ip6-unicast",
576   .node_name = "ip6-inacl",
577   .runs_before = VNET_FEATURES ("ip6-policer-classify"),
578 };
579
580 VNET_FEATURE_INIT (ip6_policer_classify, static) = {
581   .arc_name = "ip6-unicast",
582   .node_name = "ip6-policer-classify",
583   .runs_before = VNET_FEATURES ("ipsec-input-ip6"),
584 };
585
586 VNET_FEATURE_INIT (ip6_ipsec, static) = {
587   .arc_name = "ip6-unicast",
588   .node_name = "ipsec-input-ip6",
589   .runs_before = VNET_FEATURES ("l2tp-decap"),
590 };
591
592 VNET_FEATURE_INIT (ip6_l2tp, static) = {
593   .arc_name = "ip6-unicast",
594   .node_name = "l2tp-decap",
595   .runs_before = VNET_FEATURES ("vpath-input-ip6"),
596 };
597
598 VNET_FEATURE_INIT (ip6_vpath, static) = {
599   .arc_name = "ip6-unicast",
600   .node_name = "vpath-input-ip6",
601   .runs_before = VNET_FEATURES ("ip6-lookup"),
602 };
603
604 VNET_FEATURE_INIT (ip6_lookup, static) = {
605   .arc_name = "ip6-unicast",
606   .node_name = "ip6-lookup",
607   .runs_before = VNET_FEATURES ("ip6-drop"),
608 };
609
610 VNET_FEATURE_INIT (ip6_drop, static) = {
611   .arc_name = "ip6-unicast",
612   .node_name = "ip6-drop",
613   .runs_before = 0,  /*last feature*/
614 };
615
616 /* Built-in ip6 multicast rx feature path definition (none now) */
617 VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
618 {
619   .arc_name  = "ip6-multicast",
620   .start_nodes = VNET_FEATURES ("ip6-input"),
621   .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
622 };
623
624 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
625   .arc_name = "ip6-multicast",
626   .node_name = "vpath-input-ip6",
627   .runs_before = VNET_FEATURES ("ip6-lookup"),
628 };
629
630 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
631   .arc_name = "ip6-multicast",
632   .node_name = "ip6-lookup",
633   .runs_before = VNET_FEATURES ("ip6-drop"),
634 };
635
636 VNET_FEATURE_INIT (ip6_drop_mc, static) = {
637   .arc_name = "ip6-multicast",
638   .node_name = "ip6-drop",
639   .runs_before = 0, /* last feature */
640 };
641
642 /* Built-in ip4 tx feature path definition */
643 VNET_FEATURE_ARC_INIT (ip6_output, static) =
644 {
645   .arc_name  = "ip6-output",
646   .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain"),
647   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
648 };
649
650 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
651   .arc_name = "ip6-output",
652   .node_name = "ipsec-output-ip6",
653   .runs_before = VNET_FEATURES ("interface-output"),
654 };
655
656 VNET_FEATURE_INIT (ip6_interface_output, static) = {
657   .arc_name = "ip6-output",
658   .node_name = "interface-output",
659   .runs_before = 0, /* not before any other features */
660 };
661
662 clib_error_t *
663 ip6_sw_interface_add_del (vnet_main_t * vnm,
664                           u32 sw_if_index,
665                           u32 is_add)
666 {
667   vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
668                                is_add, 0, 0);
669
670   vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
671                                is_add, 0, 0);
672
673   vnet_feature_enable_disable ("ip6-output", "interface-output", sw_if_index,
674                                is_add, 0, 0);
675
676   return /* no error */ 0;
677 }
678
679 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
680
681 static uword
682 ip6_lookup (vlib_main_t * vm,
683             vlib_node_runtime_t * node,
684             vlib_frame_t * frame)
685 {
686   return ip6_lookup_inline (vm, node, frame);
687 }
688
689 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args);
690
691 VLIB_REGISTER_NODE (ip6_lookup_node) = {
692   .function = ip6_lookup,
693   .name = "ip6-lookup",
694   .vector_size = sizeof (u32),
695
696   .format_trace = format_ip6_lookup_trace,
697
698   .n_next_nodes = IP6_LOOKUP_N_NEXT,
699   .next_nodes = IP6_LOOKUP_NEXT_NODES,
700 };
701
702 VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup)
703
704 always_inline uword
705 ip6_load_balance (vlib_main_t * vm,
706                   vlib_node_runtime_t * node,
707                   vlib_frame_t * frame)
708 {
709   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
710   u32 n_left_from, n_left_to_next, * from, * to_next;
711   ip_lookup_next_t next;
712   u32 cpu_index = os_get_cpu_number();
713   ip6_main_t * im = &ip6_main;
714
715   from = vlib_frame_vector_args (frame);
716   n_left_from = frame->n_vectors;
717   next = node->cached_next_index;
718
719   if (node->flags & VLIB_NODE_FLAG_TRACE)
720       ip6_forward_next_trace(vm, node, frame, VLIB_TX);
721
722   while (n_left_from > 0)
723     {
724       vlib_get_next_frame (vm, node, next,
725                            to_next, n_left_to_next);
726
727
728       while (n_left_from > 0 && n_left_to_next > 0)
729         {
730           ip_lookup_next_t next0;
731           const load_balance_t *lb0;
732           vlib_buffer_t * p0;
733           u32 pi0, lbi0, hc0;
734           const ip6_header_t *ip0;
735           const dpo_id_t *dpo0;
736
737           pi0 = from[0];
738           to_next[0] = pi0;
739
740           p0 = vlib_get_buffer (vm, pi0);
741
742           ip0 = vlib_buffer_get_current (p0);
743           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
744
745           lb0 = load_balance_get(lbi0);
746           hc0 = lb0->lb_hash_config;
747           vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0);
748
749           dpo0 = load_balance_get_bucket_i(lb0,
750                                            vnet_buffer(p0)->ip.flow_hash &
751                                            (lb0->lb_n_buckets - 1));
752           next0 = dpo0->dpoi_next_node;
753           /* Only process the HBH Option Header if explicitly configured to do so */
754           if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
755             {
756               next0 = (dpo_is_adj(dpo0) && im->hbh_enabled) ?
757                 (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
758             }
759           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
760
761           vlib_increment_combined_counter
762               (cm, cpu_index, lbi0, 1,
763                vlib_buffer_length_in_chain (vm, p0));
764
765           from += 1;
766           to_next += 1;
767           n_left_to_next -= 1;
768           n_left_from -= 1;
769
770           if (PREDICT_FALSE (next0 != next))
771             {
772               n_left_to_next += 1;
773               vlib_put_next_frame (vm, node, next, n_left_to_next);
774               next = next0;
775               vlib_get_next_frame (vm, node, next,
776                                    to_next, n_left_to_next);
777               to_next[0] = pi0;
778               to_next += 1;
779               n_left_to_next -= 1;
780             }
781         }
782
783       vlib_put_next_frame (vm, node, next, n_left_to_next);
784     }
785
786   return frame->n_vectors;
787 }
788
789 VLIB_REGISTER_NODE (ip6_load_balance_node) = {
790   .function = ip6_load_balance,
791   .name = "ip6-load-balance",
792   .vector_size = sizeof (u32),
793   .sibling_of = "ip6-lookup",
794   .format_trace = format_ip6_lookup_trace,
795   .n_next_nodes = 0,
796 };
797
798 VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance)
799
800 typedef struct {
801   /* Adjacency taken. */
802   u32 adj_index;
803   u32 flow_hash;
804   u32 fib_index;
805
806   /* Packet data, possibly *after* rewrite. */
807   u8 packet_data[128 - 1*sizeof(u32)];
808 } ip6_forward_next_trace_t;
809
810 static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
811 {
812   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
813   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
814   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
815   uword indent = format_get_indent (s);
816
817   s = format(s, "%U%U",
818              format_white_space, indent,
819              format_ip6_header, t->packet_data, sizeof (t->packet_data));
820   return s;
821 }
822
823 static u8 * format_ip6_lookup_trace (u8 * s, va_list * args)
824 {
825   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
826   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
827   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
828   uword indent = format_get_indent (s);
829
830   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
831               t->fib_index, t->adj_index, t->flow_hash);
832   s = format(s, "\n%U%U",
833              format_white_space, indent,
834              format_ip6_header, t->packet_data, sizeof (t->packet_data));
835   return s;
836 }
837
838
839 static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
840 {
841   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
842   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
843   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
844   vnet_main_t * vnm = vnet_get_main();
845   uword indent = format_get_indent (s);
846
847   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
848               t->fib_index, t->adj_index, format_ip_adjacency,
849               t->adj_index, FORMAT_IP_ADJACENCY_NONE,
850               t->flow_hash);
851   s = format (s, "\n%U%U",
852               format_white_space, indent,
853               format_ip_adjacency_packet_data,
854               vnm, t->adj_index,
855               t->packet_data, sizeof (t->packet_data));
856   return s;
857 }
858
859 /* Common trace function for all ip6-forward next nodes. */
860 void
861 ip6_forward_next_trace (vlib_main_t * vm,
862                         vlib_node_runtime_t * node,
863                         vlib_frame_t * frame,
864                         vlib_rx_or_tx_t which_adj_index)
865 {
866   u32 * from, n_left;
867   ip6_main_t * im = &ip6_main;
868
869   n_left = frame->n_vectors;
870   from = vlib_frame_vector_args (frame);
871
872   while (n_left >= 4)
873     {
874       u32 bi0, bi1;
875       vlib_buffer_t * b0, * b1;
876       ip6_forward_next_trace_t * t0, * t1;
877
878       /* Prefetch next iteration. */
879       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
880       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
881
882       bi0 = from[0];
883       bi1 = from[1];
884
885       b0 = vlib_get_buffer (vm, bi0);
886       b1 = vlib_get_buffer (vm, bi1);
887
888       if (b0->flags & VLIB_BUFFER_IS_TRACED)
889         {
890           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
891           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
892           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
893           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
894               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
895               vec_elt (im->fib_index_by_sw_if_index,
896                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
897
898           clib_memcpy (t0->packet_data,
899                   vlib_buffer_get_current (b0),
900                   sizeof (t0->packet_data));
901         }
902       if (b1->flags & VLIB_BUFFER_IS_TRACED)
903         {
904           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
905           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
906           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
907           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
908               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
909               vec_elt (im->fib_index_by_sw_if_index,
910                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
911
912           clib_memcpy (t1->packet_data,
913                   vlib_buffer_get_current (b1),
914                   sizeof (t1->packet_data));
915         }
916       from += 2;
917       n_left -= 2;
918     }
919
920   while (n_left >= 1)
921     {
922       u32 bi0;
923       vlib_buffer_t * b0;
924       ip6_forward_next_trace_t * t0;
925
926       bi0 = from[0];
927
928       b0 = vlib_get_buffer (vm, bi0);
929
930       if (b0->flags & VLIB_BUFFER_IS_TRACED)
931         {
932           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
933           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
934           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
935           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
936               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
937               vec_elt (im->fib_index_by_sw_if_index,
938                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
939
940           clib_memcpy (t0->packet_data,
941                   vlib_buffer_get_current (b0),
942                   sizeof (t0->packet_data));
943         }
944       from += 1;
945       n_left -= 1;
946     }
947 }
948
949 static uword
950 ip6_drop_or_punt (vlib_main_t * vm,
951                   vlib_node_runtime_t * node,
952                   vlib_frame_t * frame,
953                   ip6_error_t error_code)
954 {
955   u32 * buffers = vlib_frame_vector_args (frame);
956   uword n_packets = frame->n_vectors;
957
958   vlib_error_drop_buffers (vm, node,
959                            buffers,
960                            /* stride */ 1,
961                            n_packets,
962                            /* next */ 0,
963                            ip6_input_node.index,
964                            error_code);
965
966   if (node->flags & VLIB_NODE_FLAG_TRACE)
967     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
968
969   return n_packets;
970 }
971
972 static uword
973 ip6_drop (vlib_main_t * vm,
974           vlib_node_runtime_t * node,
975           vlib_frame_t * frame)
976 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
977
978 static uword
979 ip6_punt (vlib_main_t * vm,
980           vlib_node_runtime_t * node,
981           vlib_frame_t * frame)
982 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
983
984 VLIB_REGISTER_NODE (ip6_drop_node,static) = {
985   .function = ip6_drop,
986   .name = "ip6-drop",
987   .vector_size = sizeof (u32),
988
989   .format_trace = format_ip6_forward_next_trace,
990
991   .n_next_nodes = 1,
992   .next_nodes = {
993     [0] = "error-drop",
994   },
995 };
996
997 VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop)
998
999 VLIB_REGISTER_NODE (ip6_punt_node,static) = {
1000   .function = ip6_punt,
1001   .name = "ip6-punt",
1002   .vector_size = sizeof (u32),
1003
1004   .format_trace = format_ip6_forward_next_trace,
1005
1006   .n_next_nodes = 1,
1007   .next_nodes = {
1008     [0] = "error-punt",
1009   },
1010 };
1011
1012 VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt)
1013
1014 VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
1015   .function = ip6_drop,
1016   .name = "ip6-multicast",
1017   .vector_size = sizeof (u32),
1018
1019   .format_trace = format_ip6_forward_next_trace,
1020
1021   .n_next_nodes = 1,
1022   .next_nodes = {
1023     [0] = "error-drop",
1024   },
1025 };
1026
1027 /* Compute TCP/UDP/ICMP6 checksum in software. */
1028 u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
1029 {
1030   ip_csum_t sum0;
1031   u16 sum16, payload_length_host_byte_order;
1032   u32 i, n_this_buffer, n_bytes_left;
1033   u32 headers_size = sizeof(ip0[0]);
1034   void * data_this_buffer;
1035
1036   ASSERT(bogus_lengthp);
1037   *bogus_lengthp = 0;
1038
1039   /* Initialize checksum with ip header. */
1040   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1041   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1042   data_this_buffer = (void *) (ip0 + 1);
1043
1044   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1045     {
1046       sum0 = ip_csum_with_carry (sum0,
1047                                  clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1048       sum0 = ip_csum_with_carry (sum0,
1049                                  clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1050     }
1051
1052   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1053   if (PREDICT_FALSE (ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1054     {
1055       u32  skip_bytes;
1056       ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)data_this_buffer;
1057
1058       /* validate really icmp6 next */
1059       ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
1060
1061       skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
1062       data_this_buffer  = (void *)((u8 *)data_this_buffer + skip_bytes);
1063
1064       payload_length_host_byte_order  -= skip_bytes;
1065       headers_size += skip_bytes;
1066    }
1067
1068   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1069 #if DPDK > 0
1070   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1071   {
1072     struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer(p0);
1073     u8 nb_segs = mb->nb_segs;
1074
1075     n_this_buffer = (p0->current_length > headers_size ?
1076                      p0->current_length - headers_size : 0);
1077     while (n_bytes_left)
1078       {
1079         sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1080         n_bytes_left -= n_this_buffer;
1081
1082         mb = mb->next;
1083         nb_segs--;
1084         if ((nb_segs == 0) || (mb == 0))
1085           break;
1086
1087         data_this_buffer = rte_ctrlmbuf_data(mb);
1088         n_this_buffer = mb->data_len;
1089       }
1090     if (n_bytes_left || nb_segs)
1091       {
1092         *bogus_lengthp = 1;
1093         return 0xfefe;
1094       }
1095   }
1096   else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1097 #else
1098   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1099     n_this_buffer = p0->current_length > headers_size  ? p0->current_length - headers_size  : 0;
1100   while (1)
1101     {
1102       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1103       n_bytes_left -= n_this_buffer;
1104       if (n_bytes_left == 0)
1105         break;
1106
1107       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1108         {
1109           *bogus_lengthp = 1;
1110           return 0xfefe;
1111         }
1112       p0 = vlib_get_buffer (vm, p0->next_buffer);
1113       data_this_buffer = vlib_buffer_get_current (p0);
1114       n_this_buffer = p0->current_length;
1115     }
1116 #endif /* DPDK */
1117
1118   sum16 = ~ ip_csum_fold (sum0);
1119
1120   return sum16;
1121 }
1122
1123 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1124 {
1125   ip6_header_t * ip0 = vlib_buffer_get_current (p0);
1126   udp_header_t * udp0;
1127   u16 sum16;
1128   int bogus_length;
1129
1130   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1131   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1132           || ip0->protocol == IP_PROTOCOL_ICMP6
1133           || ip0->protocol == IP_PROTOCOL_UDP
1134           || ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1135
1136   udp0 = (void *) (ip0 + 1);
1137   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1138     {
1139       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1140                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1141       return p0->flags;
1142     }
1143
1144   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1145
1146   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1147                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1148
1149   return p0->flags;
1150 }
1151
1152 static uword
1153 ip6_local (vlib_main_t * vm,
1154            vlib_node_runtime_t * node,
1155            vlib_frame_t * frame)
1156 {
1157   ip6_main_t * im = &ip6_main;
1158   ip_lookup_main_t * lm = &im->lookup_main;
1159   ip_local_next_t next_index;
1160   u32 * from, * to_next, n_left_from, n_left_to_next;
1161   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1162
1163   from = vlib_frame_vector_args (frame);
1164   n_left_from = frame->n_vectors;
1165   next_index = node->cached_next_index;
1166
1167   if (node->flags & VLIB_NODE_FLAG_TRACE)
1168     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1169
1170   while (n_left_from > 0)
1171     {
1172       vlib_get_next_frame (vm, node, next_index,
1173                            to_next, n_left_to_next);
1174
1175       while (n_left_from >= 4 && n_left_to_next >= 2)
1176         {
1177           vlib_buffer_t * p0, * p1;
1178           ip6_header_t * ip0, * ip1;
1179           udp_header_t * udp0, * udp1;
1180           u32 pi0, ip_len0, udp_len0, flags0, next0;
1181           u32 pi1, ip_len1, udp_len1, flags1, next1;
1182           i32 len_diff0, len_diff1;
1183           u8 error0, type0, good_l4_checksum0;
1184           u8 error1, type1, good_l4_checksum1;
1185
1186           pi0 = to_next[0] = from[0];
1187           pi1 = to_next[1] = from[1];
1188           from += 2;
1189           n_left_from -= 2;
1190           to_next += 2;
1191           n_left_to_next -= 2;
1192
1193           p0 = vlib_get_buffer (vm, pi0);
1194           p1 = vlib_get_buffer (vm, pi1);
1195
1196           ip0 = vlib_buffer_get_current (p0);
1197           ip1 = vlib_buffer_get_current (p1);
1198
1199           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1200           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1201
1202           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1203           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1204
1205           flags0 = p0->flags;
1206           flags1 = p1->flags;
1207
1208           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1209           good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1210
1211           udp0 = ip6_next_header (ip0);
1212           udp1 = ip6_next_header (ip1);
1213
1214           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1215           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1216           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
1217
1218           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1219           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1220
1221           /* Verify UDP length. */
1222           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1223           ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1224           udp_len0 = clib_net_to_host_u16 (udp0->length);
1225           udp_len1 = clib_net_to_host_u16 (udp1->length);
1226
1227           len_diff0 = ip_len0 - udp_len0;
1228           len_diff1 = ip_len1 - udp_len1;
1229
1230           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1231           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1232
1233           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1234                              && ! good_l4_checksum0
1235                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1236             {
1237               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1238               good_l4_checksum0 =
1239                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1240             }
1241           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1242                              && ! good_l4_checksum1
1243                              && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1244             {
1245               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1246               good_l4_checksum1 =
1247                 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1248             }
1249
1250           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1251
1252           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1253           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1254
1255           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1256           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1257           error0 = (! good_l4_checksum0
1258                     ? IP6_ERROR_UDP_CHECKSUM + type0
1259                     : error0);
1260           error1 = (! good_l4_checksum1
1261                     ? IP6_ERROR_UDP_CHECKSUM + type1
1262                     : error1);
1263
1264           /* Drop packets from unroutable hosts. */
1265           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1266           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1267               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1268               !ip6_address_is_link_local_unicast(&ip0->src_address))
1269             {
1270               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1271               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1272                         ? IP6_ERROR_SRC_LOOKUP_MISS
1273                         : error0);
1274             }
1275           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1276               type1 != IP_BUILTIN_PROTOCOL_ICMP &&
1277               !ip6_address_is_link_local_unicast(&ip1->src_address))
1278             {
1279               u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
1280               error1 = (ADJ_INDEX_INVALID == src_adj_index1
1281                         ? IP6_ERROR_SRC_LOOKUP_MISS
1282                         : error1);
1283             }
1284
1285           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1286           next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1287
1288           p0->error = error_node->errors[error0];
1289           p1->error = error_node->errors[error1];
1290
1291           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1292                                            to_next, n_left_to_next,
1293                                            pi0, pi1, next0, next1);
1294         }
1295
1296       while (n_left_from > 0 && n_left_to_next > 0)
1297         {
1298           vlib_buffer_t * p0;
1299           ip6_header_t * ip0;
1300           udp_header_t * udp0;
1301           u32 pi0, ip_len0, udp_len0, flags0, next0;
1302           i32 len_diff0;
1303           u8 error0, type0, good_l4_checksum0;
1304
1305           pi0 = to_next[0] = from[0];
1306           from += 1;
1307           n_left_from -= 1;
1308           to_next += 1;
1309           n_left_to_next -= 1;
1310
1311           p0 = vlib_get_buffer (vm, pi0);
1312
1313           ip0 = vlib_buffer_get_current (p0);
1314
1315           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1316           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1317
1318           flags0 = p0->flags;
1319
1320           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1321
1322           udp0 = ip6_next_header (ip0);
1323
1324           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1325           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1326
1327           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1328
1329           /* Verify UDP length. */
1330           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1331           udp_len0 = clib_net_to_host_u16 (udp0->length);
1332
1333           len_diff0 = ip_len0 - udp_len0;
1334
1335           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1336
1337           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1338                              && ! good_l4_checksum0
1339                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1340             {
1341               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1342               good_l4_checksum0 =
1343                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1344             }
1345
1346           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1347
1348           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1349
1350           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1351           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1352           error0 = (! good_l4_checksum0
1353                     ? IP6_ERROR_UDP_CHECKSUM + type0
1354                     : error0);
1355
1356           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1357           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
1358               type0 != IP_BUILTIN_PROTOCOL_ICMP &&
1359               !ip6_address_is_link_local_unicast(&ip0->src_address))
1360             {
1361               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1362               error0 = (ADJ_INDEX_INVALID == src_adj_index0
1363                         ? IP6_ERROR_SRC_LOOKUP_MISS
1364                         : error0);
1365             }
1366
1367           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1368
1369           p0->error = error_node->errors[error0];
1370
1371           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1372                                            to_next, n_left_to_next,
1373                                            pi0, next0);
1374         }
1375
1376       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1377     }
1378
1379   return frame->n_vectors;
1380 }
1381
1382 VLIB_REGISTER_NODE (ip6_local_node,static) = {
1383   .function = ip6_local,
1384   .name = "ip6-local",
1385   .vector_size = sizeof (u32),
1386
1387   .format_trace = format_ip6_forward_next_trace,
1388
1389   .n_next_nodes = IP_LOCAL_N_NEXT,
1390   .next_nodes = {
1391     [IP_LOCAL_NEXT_DROP] = "error-drop",
1392     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1393     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1394     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1395   },
1396 };
1397
1398 VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local)
1399
1400 void ip6_register_protocol (u32 protocol, u32 node_index)
1401 {
1402   vlib_main_t * vm = vlib_get_main();
1403   ip6_main_t * im = &ip6_main;
1404   ip_lookup_main_t * lm = &im->lookup_main;
1405
1406   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1407   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
1408 }
1409
1410 typedef enum {
1411   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1412   IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
1413   IP6_DISCOVER_NEIGHBOR_N_NEXT,
1414 } ip6_discover_neighbor_next_t;
1415
1416 typedef enum {
1417   IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
1418   IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
1419   IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
1420 } ip6_discover_neighbor_error_t;
1421
1422 static uword
1423 ip6_discover_neighbor_inline (vlib_main_t * vm,
1424                               vlib_node_runtime_t * node,
1425                               vlib_frame_t * frame,
1426                               int is_glean)
1427 {
1428   vnet_main_t * vnm = vnet_get_main();
1429   ip6_main_t * im = &ip6_main;
1430   ip_lookup_main_t * lm = &im->lookup_main;
1431   u32 * from, * to_next_drop;
1432   uword n_left_from, n_left_to_next_drop;
1433   static f64 time_last_seed_change = -1e100;
1434   static u32 hash_seeds[3];
1435   static uword hash_bitmap[256 / BITS (uword)];
1436   f64 time_now;
1437   int bogus_length;
1438
1439   if (node->flags & VLIB_NODE_FLAG_TRACE)
1440     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1441
1442   time_now = vlib_time_now (vm);
1443   if (time_now - time_last_seed_change > 1e-3)
1444     {
1445       uword i;
1446       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1447                                              sizeof (hash_seeds));
1448       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1449         hash_seeds[i] = r[i];
1450
1451       /* Mark all hash keys as been not-seen before. */
1452       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1453         hash_bitmap[i] = 0;
1454
1455       time_last_seed_change = time_now;
1456     }
1457
1458   from = vlib_frame_vector_args (frame);
1459   n_left_from = frame->n_vectors;
1460
1461   while (n_left_from > 0)
1462     {
1463       vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1464                            to_next_drop, n_left_to_next_drop);
1465
1466       while (n_left_from > 0 && n_left_to_next_drop > 0)
1467         {
1468           vlib_buffer_t * p0;
1469           ip6_header_t * ip0;
1470           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1471           uword bm0;
1472           ip_adjacency_t * adj0;
1473           vnet_hw_interface_t * hw_if0;
1474           u32 next0;
1475
1476           pi0 = from[0];
1477
1478           p0 = vlib_get_buffer (vm, pi0);
1479
1480           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1481
1482           ip0 = vlib_buffer_get_current (p0);
1483
1484           adj0 = ip_get_adjacency (lm, adj_index0);
1485
1486           if (!is_glean)
1487             {
1488               ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
1489               ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
1490             }
1491
1492           a0 = hash_seeds[0];
1493           b0 = hash_seeds[1];
1494           c0 = hash_seeds[2];
1495
1496           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1497           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1498
1499           a0 ^= sw_if_index0;
1500           b0 ^= ip0->dst_address.as_u32[0];
1501           c0 ^= ip0->dst_address.as_u32[1];
1502
1503           hash_v3_mix32 (a0, b0, c0);
1504
1505           b0 ^= ip0->dst_address.as_u32[2];
1506           c0 ^= ip0->dst_address.as_u32[3];
1507
1508           hash_v3_finalize32 (a0, b0, c0);
1509
1510           c0 &= BITS (hash_bitmap) - 1;
1511           c0 = c0 / BITS (uword);
1512           m0 = (uword) 1 << (c0 % BITS (uword));
1513
1514           bm0 = hash_bitmap[c0];
1515           drop0 = (bm0 & m0) != 0;
1516
1517           /* Mark it as seen. */
1518           hash_bitmap[c0] = bm0 | m0;
1519
1520           from += 1;
1521           n_left_from -= 1;
1522           to_next_drop[0] = pi0;
1523           to_next_drop += 1;
1524           n_left_to_next_drop -= 1;
1525
1526           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1527
1528           /* If the interface is link-down, drop the pkt */
1529           if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
1530             drop0 = 1;
1531
1532           p0->error =
1533             node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
1534                          : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
1535           if (drop0)
1536             continue;
1537
1538           /*
1539            * the adj has been updated to a rewrite but the node the DPO that got
1540            * us here hasn't - yet. no big deal. we'll drop while we wait.
1541            */
1542           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1543             continue;
1544
1545           {
1546             u32 bi0 = 0;
1547             icmp6_neighbor_solicitation_header_t * h0;
1548             vlib_buffer_t * b0;
1549
1550             h0 = vlib_packet_template_get_packet
1551               (vm, &im->discover_neighbor_packet_template, &bi0);
1552
1553             /*
1554              * Build ethernet header.
1555              * Choose source address based on destination lookup
1556              * adjacency.
1557              */
1558             if (ip6_src_address_for_packet (lm,
1559                                             sw_if_index0,
1560                                             &h0->ip.src_address))
1561               {
1562                 /* There is no address on the interface */
1563                 p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
1564                 vlib_buffer_free(vm, &bi0, 1);
1565                 continue;
1566               }
1567
1568             /*
1569              * Destination address is a solicited node multicast address.
1570              * We need to fill in
1571              * the low 24 bits with low 24 bits of target's address.
1572              */
1573             h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
1574             h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
1575             h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
1576
1577             h0->neighbor.target_address = ip0->dst_address;
1578
1579             clib_memcpy (h0->link_layer_option.ethernet_address,
1580                     hw_if0->hw_address, vec_len (hw_if0->hw_address));
1581
1582             /* $$$$ appears we need this; why is the checksum non-zero? */
1583             h0->neighbor.icmp.checksum = 0;
1584             h0->neighbor.icmp.checksum =
1585               ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
1586                                                  &bogus_length);
1587
1588             ASSERT (bogus_length == 0);
1589
1590             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1591             b0 = vlib_get_buffer (vm, bi0);
1592             vnet_buffer (b0)->sw_if_index[VLIB_TX]
1593               = vnet_buffer (p0)->sw_if_index[VLIB_TX];
1594
1595             /* Add rewrite/encap string. */
1596             vnet_rewrite_one_header (adj0[0], h0,
1597                                      sizeof (ethernet_header_t));
1598             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1599
1600             next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
1601
1602             vlib_set_next_frame_buffer (vm, node, next0, bi0);
1603           }
1604         }
1605
1606       vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1607                            n_left_to_next_drop);
1608     }
1609
1610   return frame->n_vectors;
1611 }
1612
1613 static uword
1614 ip6_discover_neighbor (vlib_main_t * vm,
1615                        vlib_node_runtime_t * node,
1616                        vlib_frame_t * frame)
1617 {
1618     return (ip6_discover_neighbor_inline(vm, node, frame, 0));
1619 }
1620
1621 static uword
1622 ip6_glean (vlib_main_t * vm,
1623            vlib_node_runtime_t * node,
1624            vlib_frame_t * frame)
1625 {
1626     return (ip6_discover_neighbor_inline(vm, node, frame, 1));
1627 }
1628
1629 static char * ip6_discover_neighbor_error_strings[] = {
1630   [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
1631   [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]
1632   = "neighbor solicitations sent",
1633   [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
1634     = "no source address for ND solicitation",
1635 };
1636
1637 VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
1638   .function = ip6_discover_neighbor,
1639   .name = "ip6-discover-neighbor",
1640   .vector_size = sizeof (u32),
1641
1642   .format_trace = format_ip6_forward_next_trace,
1643
1644   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1645   .error_strings = ip6_discover_neighbor_error_strings,
1646
1647   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1648   .next_nodes = {
1649     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1650     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1651   },
1652 };
1653
1654 VLIB_REGISTER_NODE (ip6_glean_node) = {
1655   .function = ip6_glean,
1656   .name = "ip6-glean",
1657   .vector_size = sizeof (u32),
1658
1659   .format_trace = format_ip6_forward_next_trace,
1660
1661   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1662   .error_strings = ip6_discover_neighbor_error_strings,
1663
1664   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1665   .next_nodes = {
1666     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1667     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
1668   },
1669 };
1670
1671 clib_error_t *
1672 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
1673 {
1674   vnet_main_t * vnm = vnet_get_main();
1675   ip6_main_t * im = &ip6_main;
1676   icmp6_neighbor_solicitation_header_t * h;
1677   ip6_address_t * src;
1678   ip_interface_address_t * ia;
1679   ip_adjacency_t * adj;
1680   vnet_hw_interface_t * hi;
1681   vnet_sw_interface_t * si;
1682   vlib_buffer_t * b;
1683   u32 bi = 0;
1684   int bogus_length;
1685
1686   si = vnet_get_sw_interface (vnm, sw_if_index);
1687
1688   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1689     {
1690       return clib_error_return (0, "%U: interface %U down",
1691                                 format_ip6_address, dst,
1692                                 format_vnet_sw_if_index_name, vnm,
1693                                 sw_if_index);
1694     }
1695
1696   src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1697   if (! src)
1698     {
1699       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1700       return clib_error_return
1701         (0, "no matching interface address for destination %U (interface %U)",
1702          format_ip6_address, dst,
1703          format_vnet_sw_if_index_name, vnm, sw_if_index);
1704     }
1705
1706   h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
1707
1708   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1709
1710   /* Destination address is a solicited node multicast address.  We need to fill in
1711      the low 24 bits with low 24 bits of target's address. */
1712   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
1713   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
1714   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
1715
1716   h->ip.src_address = src[0];
1717   h->neighbor.target_address = dst[0];
1718
1719   clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
1720
1721   h->neighbor.icmp.checksum =
1722     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
1723   ASSERT(bogus_length == 0);
1724
1725   b = vlib_get_buffer (vm, bi);
1726   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1727
1728   /* Add encapsulation string for software interface (e.g. ethernet header). */
1729   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
1730   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1731   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1732
1733   {
1734     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
1735     u32 * to_next = vlib_frame_vector_args (f);
1736     to_next[0] = bi;
1737     f->n_vectors = 1;
1738     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1739   }
1740
1741   return /* no error */ 0;
1742 }
1743
1744 typedef enum {
1745   IP6_REWRITE_NEXT_DROP,
1746   IP6_REWRITE_NEXT_ICMP_ERROR,
1747 } ip6_rewrite_next_t;
1748
1749 always_inline uword
1750 ip6_rewrite_inline (vlib_main_t * vm,
1751                     vlib_node_runtime_t * node,
1752                     vlib_frame_t * frame,
1753                     int rewrite_for_locally_received_packets,
1754                     int is_midchain)
1755 {
1756   ip_lookup_main_t * lm = &ip6_main.lookup_main;
1757   u32 * from = vlib_frame_vector_args (frame);
1758   u32 n_left_from, n_left_to_next, * to_next, next_index;
1759   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1760   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
1761
1762   n_left_from = frame->n_vectors;
1763   next_index = node->cached_next_index;
1764   u32 cpu_index = os_get_cpu_number();
1765
1766   while (n_left_from > 0)
1767     {
1768       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1769
1770       while (n_left_from >= 4 && n_left_to_next >= 2)
1771         {
1772           ip_adjacency_t * adj0, * adj1;
1773           vlib_buffer_t * p0, * p1;
1774           ip6_header_t * ip0, * ip1;
1775           u32 pi0, rw_len0, next0, error0, adj_index0;
1776           u32 pi1, rw_len1, next1, error1, adj_index1;
1777           u32 tx_sw_if_index0, tx_sw_if_index1;
1778
1779           /* Prefetch next iteration. */
1780           {
1781             vlib_buffer_t * p2, * p3;
1782
1783             p2 = vlib_get_buffer (vm, from[2]);
1784             p3 = vlib_get_buffer (vm, from[3]);
1785
1786             vlib_prefetch_buffer_header (p2, LOAD);
1787             vlib_prefetch_buffer_header (p3, LOAD);
1788
1789             CLIB_PREFETCH (p2->pre_data, 32, STORE);
1790             CLIB_PREFETCH (p3->pre_data, 32, STORE);
1791
1792             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1793             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1794           }
1795
1796           pi0 = to_next[0] = from[0];
1797           pi1 = to_next[1] = from[1];
1798
1799           from += 2;
1800           n_left_from -= 2;
1801           to_next += 2;
1802           n_left_to_next -= 2;
1803
1804           p0 = vlib_get_buffer (vm, pi0);
1805           p1 = vlib_get_buffer (vm, pi1);
1806
1807           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
1808           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
1809
1810           /* We should never rewrite a pkt using the MISS adjacency */
1811           ASSERT(adj_index0 && adj_index1);
1812
1813           ip0 = vlib_buffer_get_current (p0);
1814           ip1 = vlib_buffer_get_current (p1);
1815
1816           error0 = error1 = IP6_ERROR_NONE;
1817           next0 = next1 = IP6_REWRITE_NEXT_DROP;
1818
1819           if (! rewrite_for_locally_received_packets)
1820             {
1821               i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
1822
1823               /* Input node should have reject packets with hop limit 0. */
1824               ASSERT (ip0->hop_limit > 0);
1825               ASSERT (ip1->hop_limit > 0);
1826
1827               hop_limit0 -= 1;
1828               hop_limit1 -= 1;
1829
1830               ip0->hop_limit = hop_limit0;
1831               ip1->hop_limit = hop_limit1;
1832
1833               /*
1834                * If the hop count drops below 1 when forwarding, generate
1835                * an ICMP response.
1836                */
1837               if (PREDICT_FALSE(hop_limit0 <= 0))
1838                 {
1839                   error0 = IP6_ERROR_TIME_EXPIRED;
1840                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1841                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
1842                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
1843                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1844                 }
1845               if (PREDICT_FALSE(hop_limit1 <= 0))
1846                 {
1847                   error1 = IP6_ERROR_TIME_EXPIRED;
1848                   next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
1849                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
1850                   icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded,
1851                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1852                 }
1853             }
1854
1855           adj0 = ip_get_adjacency (lm, adj_index0);
1856           adj1 = ip_get_adjacency (lm, adj_index1);
1857
1858           rw_len0 = adj0[0].rewrite_header.data_bytes;
1859           rw_len1 = adj1[0].rewrite_header.data_bytes;
1860           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
1861           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
1862
1863           vlib_increment_combined_counter (&adjacency_counters,
1864                                            cpu_index,
1865                                            adj_index0,
1866                                            /* packet increment */ 0,
1867                                            /* byte increment */ rw_len0);
1868           vlib_increment_combined_counter (&adjacency_counters,
1869                                            cpu_index,
1870                                            adj_index1,
1871                                            /* packet increment */ 0,
1872                                            /* byte increment */ rw_len1);
1873
1874           /* Check MTU of outgoing interface. */
1875           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
1876                     ? IP6_ERROR_MTU_EXCEEDED
1877                     : error0);
1878           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
1879                     ? IP6_ERROR_MTU_EXCEEDED
1880                     : error1);
1881
1882           /* Don't adjust the buffer for hop count issue; icmp-error node
1883            * wants to see the IP headerr */
1884           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
1885             {
1886               p0->current_data -= rw_len0;
1887               p0->current_length += rw_len0;
1888
1889               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
1890               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
1891                   tx_sw_if_index0;
1892               next0 = adj0[0].rewrite_header.next_index;
1893
1894               vnet_feature_arc_start(lm->output_feature_arc_index,
1895                                      tx_sw_if_index0, &next0, p0);
1896             }
1897           if (PREDICT_TRUE(error1 == IP6_ERROR_NONE))
1898             {
1899               p1->current_data -= rw_len1;
1900               p1->current_length += rw_len1;
1901
1902               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
1903               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
1904                   tx_sw_if_index1;
1905               next1 = adj1[0].rewrite_header.next_index;
1906
1907               vnet_feature_arc_start(lm->output_feature_arc_index,
1908                                      tx_sw_if_index1, &next1, p1);
1909             }
1910
1911           /* Guess we are only writing on simple Ethernet header. */
1912           vnet_rewrite_two_headers (adj0[0], adj1[0],
1913                                     ip0, ip1,
1914                                     sizeof (ethernet_header_t));
1915
1916           if (is_midchain)
1917           {
1918               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
1919               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
1920           }
1921
1922           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1923                                            to_next, n_left_to_next,
1924                                            pi0, pi1, next0, next1);
1925         }
1926
1927       while (n_left_from > 0 && n_left_to_next > 0)
1928         {
1929           ip_adjacency_t * adj0;
1930           vlib_buffer_t * p0;
1931           ip6_header_t * ip0;
1932           u32 pi0, rw_len0;
1933           u32 adj_index0, next0, error0;
1934           u32 tx_sw_if_index0;
1935
1936           pi0 = to_next[0] = from[0];
1937
1938           p0 = vlib_get_buffer (vm, pi0);
1939
1940           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
1941
1942           /* We should never rewrite a pkt using the MISS adjacency */
1943           ASSERT(adj_index0);
1944
1945           adj0 = ip_get_adjacency (lm, adj_index0);
1946
1947           ip0 = vlib_buffer_get_current (p0);
1948
1949           error0 = IP6_ERROR_NONE;
1950           next0 = IP6_REWRITE_NEXT_DROP;
1951
1952           /* Check hop limit */
1953           if (! rewrite_for_locally_received_packets)
1954             {
1955               i32 hop_limit0 = ip0->hop_limit;
1956
1957               ASSERT (ip0->hop_limit > 0);
1958
1959               hop_limit0 -= 1;
1960
1961               ip0->hop_limit = hop_limit0;
1962
1963               if (PREDICT_FALSE(hop_limit0 <= 0))
1964                 {
1965                   /*
1966                    * If the hop count drops below 1 when forwarding, generate
1967                    * an ICMP response.
1968                    */
1969                   error0 = IP6_ERROR_TIME_EXPIRED;
1970                   next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
1971                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
1972                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
1973                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
1974                 }
1975             }
1976
1977           /* Guess we are only writing on simple Ethernet header. */
1978           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
1979
1980           /* Update packet buffer attributes/set output interface. */
1981           rw_len0 = adj0[0].rewrite_header.data_bytes;
1982           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
1983
1984           vlib_increment_combined_counter (&adjacency_counters,
1985                                            cpu_index,
1986                                            adj_index0,
1987                                            /* packet increment */ 0,
1988                                            /* byte increment */ rw_len0);
1989
1990           /* Check MTU of outgoing interface. */
1991           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
1992                     ? IP6_ERROR_MTU_EXCEEDED
1993                     : error0);
1994
1995           /* Don't adjust the buffer for hop count issue; icmp-error node
1996            * wants to see the IP headerr */
1997           if (PREDICT_TRUE(error0 == IP6_ERROR_NONE))
1998             {
1999               p0->current_data -= rw_len0;
2000               p0->current_length += rw_len0;
2001
2002               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2003
2004               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2005               next0 = adj0[0].rewrite_header.next_index;
2006
2007               vnet_feature_arc_start(lm->output_feature_arc_index,
2008                                      tx_sw_if_index0, &next0, p0);
2009             }
2010
2011           if (is_midchain)
2012           {
2013               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2014           }
2015
2016           p0->error = error_node->errors[error0];
2017
2018           from += 1;
2019           n_left_from -= 1;
2020           to_next += 1;
2021           n_left_to_next -= 1;
2022
2023           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2024                                            to_next, n_left_to_next,
2025                                            pi0, next0);
2026         }
2027
2028       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2029     }
2030
2031   /* Need to do trace after rewrites to pick up new packet data. */
2032   if (node->flags & VLIB_NODE_FLAG_TRACE)
2033     ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
2034
2035   return frame->n_vectors;
2036 }
2037
2038 static uword
2039 ip6_rewrite_transit (vlib_main_t * vm,
2040                      vlib_node_runtime_t * node,
2041                      vlib_frame_t * frame)
2042 {
2043   return ip6_rewrite_inline (vm, node, frame,
2044                              /* rewrite_for_locally_received_packets */ 0,
2045                              /* midchain */ 0);
2046 }
2047
2048 static uword
2049 ip6_rewrite_local (vlib_main_t * vm,
2050                    vlib_node_runtime_t * node,
2051                    vlib_frame_t * frame)
2052 {
2053   return ip6_rewrite_inline (vm, node, frame,
2054                              /* rewrite_for_locally_received_packets */ 1,
2055                              /* midchain */ 0);
2056 }
2057
2058 static uword
2059 ip6_midchain (vlib_main_t * vm,
2060               vlib_node_runtime_t * node,
2061               vlib_frame_t * frame)
2062 {
2063   return ip6_rewrite_inline (vm, node, frame,
2064                              /* rewrite_for_locally_received_packets */ 0,
2065                              /* midchain */ 1);
2066 }
2067
2068 VLIB_REGISTER_NODE (ip6_midchain_node) = {
2069   .function = ip6_midchain,
2070   .name = "ip6-midchain",
2071   .vector_size = sizeof (u32),
2072
2073   .format_trace = format_ip6_forward_next_trace,
2074
2075   .sibling_of = "ip6-rewrite",
2076 };
2077
2078 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
2079
2080 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
2081   .function = ip6_rewrite_transit,
2082   .name = "ip6-rewrite",
2083   .vector_size = sizeof (u32),
2084
2085   .format_trace = format_ip6_rewrite_trace,
2086
2087   .n_next_nodes = 2,
2088   .next_nodes = {
2089     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2090     [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
2091   },
2092 };
2093
2094 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit);
2095
2096 VLIB_REGISTER_NODE (ip6_rewrite_local_node) = {
2097   .function = ip6_rewrite_local,
2098   .name = "ip6-rewrite-local",
2099   .vector_size = sizeof (u32),
2100
2101   .sibling_of = "ip6-rewrite",
2102
2103   .format_trace = format_ip6_rewrite_trace,
2104
2105   .n_next_nodes = 0,
2106 };
2107
2108 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local);
2109
2110 /*
2111  * Hop-by-Hop handling
2112  */
2113
2114 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
2115
2116 #define foreach_ip6_hop_by_hop_error \
2117 _(PROCESSED, "pkts with ip6 hop-by-hop options") \
2118 _(FORMAT, "incorrectly formatted hop-by-hop options") \
2119 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
2120
2121 typedef enum {
2122 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
2123   foreach_ip6_hop_by_hop_error
2124 #undef _
2125   IP6_HOP_BY_HOP_N_ERROR,
2126 } ip6_hop_by_hop_error_t;
2127
2128 /*
2129  * Primary h-b-h handler trace support
2130  * We work pretty hard on the problem for obvious reasons
2131  */
2132 typedef struct {
2133   u32 next_index;
2134   u32 trace_len;
2135   u8 option_data[256];
2136 } ip6_hop_by_hop_trace_t;
2137
2138 vlib_node_registration_t ip6_hop_by_hop_node;
2139
2140 static char * ip6_hop_by_hop_error_strings[] = {
2141 #define _(sym,string) string,
2142   foreach_ip6_hop_by_hop_error
2143 #undef _
2144 };
2145
2146 static u8 *
2147 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
2148 {
2149   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
2150   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
2151   ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
2152   ip6_hop_by_hop_header_t *hbh0;
2153   ip6_hop_by_hop_option_t *opt0, *limit0;
2154   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2155
2156   u8 type0;
2157
2158   hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
2159
2160   s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
2161               t->next_index, (hbh0->length+1)<<3, t->trace_len);
2162
2163   opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
2164   limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
2165
2166   while (opt0 < limit0) {
2167     type0 = opt0->type;
2168     switch (type0) {
2169     case 0: /* Pad, just stop */
2170       opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2171       break;
2172
2173     default:
2174       if (hm->trace[type0]) {
2175         s = (*hm->trace[type0])(s, opt0);
2176       } else {
2177         s = format (s, "\n    unrecognized option %d length %d", type0, opt0->length);
2178       }
2179       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2180       break;
2181     }
2182   }
2183   return s;
2184 }
2185
2186 always_inline u8 ip6_scan_hbh_options (
2187                                        vlib_buffer_t * b0,
2188                                        ip6_header_t *ip0,
2189                                        ip6_hop_by_hop_header_t *hbh0,
2190                                        ip6_hop_by_hop_option_t *opt0,
2191                                        ip6_hop_by_hop_option_t *limit0,
2192                                        u32 *next0)
2193 {
2194   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2195   u8 type0;
2196   u8 error0 = 0;
2197
2198   while (opt0 < limit0)
2199     {
2200       type0 = opt0->type;
2201       switch (type0)
2202         {
2203         case 0: /* Pad1 */
2204           opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
2205           continue;
2206         case 1: /* PadN */
2207           break;
2208         default:
2209           if (hm->options[type0])
2210             {
2211               if ((*hm->options[type0])(b0, ip0, opt0) < 0)
2212                 {
2213                   error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2214                   return(error0);
2215                 }
2216             }
2217           else
2218             {
2219               /* Unrecognized mandatory option, check the two high order bits */
2220               switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
2221                 {
2222                 case HBH_OPTION_TYPE_SKIP_UNKNOWN:
2223                   break;
2224                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
2225                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2226                   *next0 = IP_LOOKUP_NEXT_DROP;
2227                   break;
2228                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
2229                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2230                   *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
2231                   icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2232                                               ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2233                   break;
2234                 case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
2235                   error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
2236                   if (!ip6_address_is_multicast(&ip0->dst_address))
2237                     {
2238                       *next0 =  IP_LOOKUP_NEXT_ICMP_ERROR;
2239                       icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
2240                                                   ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
2241                     }
2242                   else
2243                     {
2244                       *next0 =  IP_LOOKUP_NEXT_DROP;
2245                     }
2246                   break;
2247                 }
2248               return(error0);
2249             }
2250         }
2251       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
2252     }
2253   return(error0);
2254 }
2255
2256 /*
2257  * Process the Hop-by-Hop Options header
2258  */
2259 static uword
2260 ip6_hop_by_hop (vlib_main_t * vm,
2261                 vlib_node_runtime_t * node,
2262                 vlib_frame_t * frame)
2263 {
2264   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_hop_by_hop_node.index);
2265   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
2266   u32 n_left_from, *from, *to_next;
2267   ip_lookup_next_t next_index;
2268   ip6_main_t * im = &ip6_main;
2269   ip_lookup_main_t *lm = &im->lookup_main;
2270
2271   from = vlib_frame_vector_args (frame);
2272   n_left_from = frame->n_vectors;
2273   next_index = node->cached_next_index;
2274
2275   while (n_left_from > 0) {
2276     u32 n_left_to_next;
2277
2278     vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2279
2280     while (n_left_from >= 4 && n_left_to_next >= 2) {
2281       u32 bi0, bi1;
2282       vlib_buffer_t * b0, *b1;
2283       u32 next0, next1;
2284       ip6_header_t * ip0, *ip1;
2285       ip6_hop_by_hop_header_t *hbh0, *hbh1;
2286       ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
2287       u8 error0 = 0, error1 = 0;
2288
2289       /* Prefetch next iteration. */
2290       {
2291         vlib_buffer_t * p2, * p3;
2292
2293         p2 = vlib_get_buffer (vm, from[2]);
2294         p3 = vlib_get_buffer (vm, from[3]);
2295
2296         vlib_prefetch_buffer_header (p2, LOAD);
2297         vlib_prefetch_buffer_header (p3, LOAD);
2298
2299         CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2300         CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
2301       }
2302
2303       /* Speculatively enqueue b0, b1 to the current next frame */
2304       to_next[0] = bi0 = from[0];
2305       to_next[1] = bi1 = from[1];
2306       from += 2;
2307       to_next += 2;
2308       n_left_from -= 2;
2309       n_left_to_next -= 2;
2310
2311       b0 = vlib_get_buffer (vm, bi0);
2312       b1 = vlib_get_buffer (vm, bi1);
2313
2314       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2315       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2316       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2317       u32 adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
2318       ip_adjacency_t *adj1 = ip_get_adjacency(lm, adj_index1);
2319
2320       /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
2321       next0 = adj0->lookup_next_index;
2322       next1 = adj1->lookup_next_index;
2323
2324       ip0 = vlib_buffer_get_current (b0);
2325       ip1 = vlib_buffer_get_current (b1);
2326       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2327       hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1);
2328       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2329       opt1 = (ip6_hop_by_hop_option_t *)(hbh1+1);
2330       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2331       limit1 = (ip6_hop_by_hop_option_t *)((u8 *)hbh1 + ((hbh1->length + 1) << 3));
2332
2333       /*
2334        * Basic validity checks
2335        */
2336       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2337         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2338         next0 = IP_LOOKUP_NEXT_DROP;
2339         goto outdual;
2340       }
2341       /* Scan the set of h-b-h options, process ones that we understand */
2342       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2343
2344       if ((hbh1->length + 1) << 3 > clib_net_to_host_u16(ip1->payload_length)) {
2345         error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2346         next1 = IP_LOOKUP_NEXT_DROP;
2347         goto outdual;
2348       }
2349       /* Scan the set of h-b-h options, process ones that we understand */
2350       error1 = ip6_scan_hbh_options(b1,ip1,hbh1,opt1,limit1, &next1);
2351
2352     outdual:
2353       /* Has the classifier flagged this buffer for special treatment? */
2354       if (PREDICT_FALSE((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index & OI_DECAP)))
2355         next0 = hm->next_override;
2356
2357       /* Has the classifier flagged this buffer for special treatment? */
2358       if (PREDICT_FALSE((error1 == 0) && (vnet_buffer(b1)->l2_classify.opaque_index & OI_DECAP)))
2359         next1 = hm->next_override;
2360
2361       if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
2362         {
2363           if (b0->flags & VLIB_BUFFER_IS_TRACED) {
2364             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2365             u32 trace_len = (hbh0->length + 1) << 3;
2366             t->next_index = next0;
2367             /* Capture the h-b-h option verbatim */
2368             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2369             t->trace_len = trace_len;
2370             clib_memcpy(t->option_data, hbh0, trace_len);
2371           }
2372           if (b1->flags & VLIB_BUFFER_IS_TRACED) {
2373             ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b1, sizeof (*t));
2374             u32 trace_len = (hbh1->length + 1) << 3;
2375             t->next_index = next1;
2376             /* Capture the h-b-h option verbatim */
2377             trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2378             t->trace_len = trace_len;
2379             clib_memcpy(t->option_data, hbh1, trace_len);
2380           }
2381
2382         }
2383
2384       b0->error = error_node->errors[error0];
2385       b1->error = error_node->errors[error1];
2386
2387       /* verify speculative enqueue, maybe switch current next frame */
2388       vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0,
2389                                        bi1,next0, next1);
2390     }
2391
2392     while (n_left_from > 0 && n_left_to_next > 0) {
2393       u32 bi0;
2394       vlib_buffer_t * b0;
2395       u32 next0;
2396       ip6_header_t * ip0;
2397       ip6_hop_by_hop_header_t *hbh0;
2398       ip6_hop_by_hop_option_t *opt0, *limit0;
2399       u8 error0 = 0;
2400
2401       /* Speculatively enqueue b0 to the current next frame */
2402       bi0 = from[0];
2403       to_next[0] = bi0;
2404       from += 1;
2405       to_next += 1;
2406       n_left_from -= 1;
2407       n_left_to_next -= 1;
2408
2409       b0 = vlib_get_buffer (vm, bi0);
2410       /*
2411        * Default use the next_index from the adjacency.
2412        * A HBH option rarely redirects to a different node 
2413        */
2414       u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
2415       ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
2416       next0 = adj0->lookup_next_index;
2417
2418       ip0 = vlib_buffer_get_current (b0);
2419       hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
2420       opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
2421       limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
2422
2423       /*
2424        * Basic validity checks
2425        */
2426       if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
2427         error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
2428         next0 = IP_LOOKUP_NEXT_DROP;
2429         goto out0;
2430       }
2431
2432       /* Scan the set of h-b-h options, process ones that we understand */
2433       error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
2434
2435     out0:
2436       /* Has the classifier flagged this buffer for special treatment? */
2437     if (PREDICT_FALSE((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index & OI_DECAP)))
2438         next0 = hm->next_override;
2439
2440       if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) {
2441         ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
2442         u32 trace_len = (hbh0->length + 1) << 3;
2443         t->next_index = next0;
2444         /* Capture the h-b-h option verbatim */
2445         trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
2446         t->trace_len = trace_len;
2447         clib_memcpy(t->option_data, hbh0, trace_len);
2448       }
2449
2450       b0->error = error_node->errors[error0];
2451
2452       /* verify speculative enqueue, maybe switch current next frame */
2453       vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0);
2454     }
2455     vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2456   }
2457   return frame->n_vectors;
2458 }
2459
2460 VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
2461   .function = ip6_hop_by_hop,
2462   .name = "ip6-hop-by-hop",
2463   .sibling_of = "ip6-lookup",
2464   .vector_size = sizeof (u32),
2465   .format_trace = format_ip6_hop_by_hop_trace,
2466   .type = VLIB_NODE_TYPE_INTERNAL,
2467   .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
2468   .error_strings = ip6_hop_by_hop_error_strings,
2469   .n_next_nodes = 0,
2470 };
2471
2472 VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
2473
2474 static clib_error_t *
2475 ip6_hop_by_hop_init (vlib_main_t * vm)
2476 {
2477   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2478   memset(hm->options, 0, sizeof(hm->options));
2479   memset(hm->trace, 0, sizeof(hm->trace));
2480   hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
2481   return (0);
2482 }
2483
2484 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
2485
2486 void ip6_hbh_set_next_override (uword next)
2487 {
2488   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2489
2490   hm->next_override = next;
2491 }
2492
2493 int
2494 ip6_hbh_register_option (u8 option,
2495                          int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt),
2496                          u8 *trace(u8 *s, ip6_hop_by_hop_option_t *opt))
2497 {
2498   ip6_main_t * im = &ip6_main;
2499   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2500
2501   ASSERT (option < ARRAY_LEN (hm->options));
2502
2503   /* Already registered */
2504   if (hm->options[option])
2505     return (-1);
2506
2507   hm->options[option] = options;
2508   hm->trace[option] = trace;
2509
2510   /* Set global variable */
2511   im->hbh_enabled = 1;
2512
2513   return (0);
2514 }
2515
2516 int
2517 ip6_hbh_unregister_option (u8 option)
2518 {
2519   ip6_main_t * im = &ip6_main;
2520   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
2521
2522   ASSERT (option < ARRAY_LEN (hm->options));
2523
2524   /* Not registered */
2525   if (!hm->options[option])
2526     return (-1);
2527
2528   hm->options[option] = NULL;
2529   hm->trace[option] = NULL;
2530
2531   /* Disable global knob if this was the last option configured */
2532   int i;
2533   bool found = false;
2534   for (i = 0; i < 256; i++) {
2535     if (hm->options[option]) {
2536       found = true;
2537       break;
2538     }
2539   }
2540   if (!found)
2541     im->hbh_enabled = 0;
2542
2543   return (0);
2544 }
2545
2546 /* Global IP6 main. */
2547 ip6_main_t ip6_main;
2548
2549 static clib_error_t *
2550 ip6_lookup_init (vlib_main_t * vm)
2551 {
2552   ip6_main_t * im = &ip6_main;
2553   clib_error_t * error;
2554   uword i;
2555
2556   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
2557     return error;
2558
2559   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2560     {
2561       u32 j, i0, i1;
2562
2563       i0 = i / 32;
2564       i1 = i % 32;
2565
2566       for (j = 0; j < i0; j++)
2567         im->fib_masks[i].as_u32[j] = ~0;
2568
2569       if (i1)
2570         im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2571     }
2572
2573   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2574
2575   if (im->lookup_table_nbuckets == 0)
2576     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2577
2578   im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
2579
2580   if (im->lookup_table_size == 0)
2581     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2582
2583   BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
2584                         "ip6 FIB fwding table",
2585                         im->lookup_table_nbuckets,
2586                         im->lookup_table_size);
2587   BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
2588                         "ip6 FIB non-fwding table",
2589                         im->lookup_table_nbuckets,
2590                         im->lookup_table_size);
2591
2592   /* Create FIB with index 0 and table id of 0. */
2593   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0);
2594
2595   {
2596     pg_node_t * pn;
2597     pn = pg_get_node (ip6_lookup_node.index);
2598     pn->unformat_edit = unformat_pg_ip6_header;
2599   }
2600
2601   /* Unless explicitly configured, don't process HBH options */
2602   im->hbh_enabled = 0;
2603
2604   {
2605     icmp6_neighbor_solicitation_header_t p;
2606
2607     memset (&p, 0, sizeof (p));
2608
2609     p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
2610     p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
2611                                                 - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
2612     p.ip.protocol = IP_PROTOCOL_ICMP6;
2613     p.ip.hop_limit = 255;
2614     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2615
2616     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2617
2618     p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2619     p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
2620
2621     vlib_packet_template_init (vm,
2622                                &im->discover_neighbor_packet_template,
2623                                &p, sizeof (p),
2624                                /* alloc chunk size */ 8,
2625                                "ip6 neighbor discovery");
2626   }
2627
2628   return error;
2629 }
2630
2631 VLIB_INIT_FUNCTION (ip6_lookup_init);
2632
2633 static clib_error_t *
2634 add_del_ip6_interface_table (vlib_main_t * vm,
2635                              unformat_input_t * input,
2636                              vlib_cli_command_t * cmd)
2637 {
2638   vnet_main_t * vnm = vnet_get_main();
2639   clib_error_t * error = 0;
2640   u32 sw_if_index, table_id;
2641
2642   sw_if_index = ~0;
2643
2644   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2645     {
2646       error = clib_error_return (0, "unknown interface `%U'",
2647                                  format_unformat_error, input);
2648       goto done;
2649     }
2650
2651   if (unformat (input, "%d", &table_id))
2652     ;
2653   else
2654     {
2655       error = clib_error_return (0, "expected table id `%U'",
2656                                  format_unformat_error, input);
2657       goto done;
2658     }
2659
2660   {
2661     u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
2662                                                       table_id);
2663
2664     vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
2665     ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
2666   }
2667
2668
2669  done:
2670   return error;
2671 }
2672
2673 /*?
2674  * Place the indicated interface into the supplied IPv6 FIB table (also known
2675  * as a VRF). If the FIB table does not exist, this command creates it. To
2676  * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
2677  * FIB table will only be displayed if a route has been added to the table, or
2678  * an IP Address is assigned to an interface in the table (which adds a route
2679  * automatically).
2680  *
2681  * @note IP addresses added after setting the interface IP table end up in
2682  * the indicated FIB table. If the IP address is added prior to adding the
2683  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2684  * but potentially counter-intuitive results occur if you provision interface
2685  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2686  * IP table ID provisioned. It might be marginally useful to evade source RPF
2687  * drops to put an interface address into multiple FIBs.
2688  *
2689  * @cliexpar
2690  * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
2691  * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
2692  ?*/
2693 /* *INDENT-OFF* */
2694 VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = {
2695   .path = "set interface ip6 table",
2696   .function = add_del_ip6_interface_table,
2697   .short_help = "set interface ip6 table <interface> <table-id>"
2698 };
2699 /* *INDENT-ON* */
2700
2701 void
2702 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
2703                                                   u8 *mac)
2704 {
2705   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2706   /* Invert the "u" bit */
2707   ip->as_u8 [8] = mac[0] ^ (1<<1);
2708   ip->as_u8 [9] = mac[1];
2709   ip->as_u8 [10] = mac[2];
2710   ip->as_u8 [11] = 0xFF;
2711   ip->as_u8 [12] = 0xFE;
2712   ip->as_u8 [13] = mac[3];
2713   ip->as_u8 [14] = mac[4];
2714   ip->as_u8 [15] = mac[5];
2715 }
2716
2717 void
2718 ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
2719                                                   ip6_address_t *ip)
2720 {
2721   /* Invert the previously inverted "u" bit */
2722   mac[0] = ip->as_u8 [8] ^ (1<<1);
2723   mac[1] = ip->as_u8 [9];
2724   mac[2] = ip->as_u8 [10];
2725   mac[3] = ip->as_u8 [13];
2726   mac[4] = ip->as_u8 [14];
2727   mac[5] = ip->as_u8 [15];
2728 }
2729
2730 static clib_error_t *
2731 test_ip6_link_command_fn (vlib_main_t * vm,
2732                           unformat_input_t * input,
2733                           vlib_cli_command_t * cmd)
2734 {
2735   u8 mac[6];
2736   ip6_address_t _a, *a = &_a;
2737
2738   if (unformat (input, "%U", unformat_ethernet_address, mac))
2739     {
2740       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2741       vlib_cli_output (vm, "Link local address: %U",
2742                        format_ip6_address, a);
2743       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2744       vlib_cli_output (vm, "Original MAC address: %U",
2745                        format_ethernet_address, mac);
2746     }
2747
2748   return 0;
2749 }
2750
2751 /*?
2752  * This command converts the given MAC Address into an IPv6 link-local
2753  * address.
2754  *
2755  * @cliexpar
2756  * Example of how to create an IPv6 link-local address:
2757  * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
2758  * Link local address: fe80::14d9:e0ff:fe91:7986
2759  * Original MAC address: 16:d9:e0:91:79:86
2760  * @cliexend
2761 ?*/
2762 /* *INDENT-OFF* */
2763 VLIB_CLI_COMMAND (test_link_command, static) = {
2764   .path = "test ip6 link",
2765   .function = test_ip6_link_command_fn,
2766   .short_help = "test ip6 link <mac-address>",
2767 };
2768 /* *INDENT-ON* */
2769
2770 int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2771 {
2772   ip6_main_t * im6 = &ip6_main;
2773   ip6_fib_t * fib;
2774   uword * p = hash_get (im6->fib_index_by_table_id, table_id);
2775
2776   if (p == 0)
2777     return -1;
2778
2779   fib = ip6_fib_get (p[0]);
2780
2781   fib->flow_hash_config = flow_hash_config;
2782   return 1;
2783 }
2784
2785 static clib_error_t *
2786 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2787                               unformat_input_t * input,
2788                               vlib_cli_command_t * cmd)
2789 {
2790   int matched = 0;
2791   u32 table_id = 0;
2792   u32 flow_hash_config = 0;
2793   int rv;
2794
2795   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2796     if (unformat (input, "table %d", &table_id))
2797       matched = 1;
2798 #define _(a,v) \
2799     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2800     foreach_flow_hash_bit
2801 #undef _
2802     else break;
2803   }
2804
2805   if (matched == 0)
2806     return clib_error_return (0, "unknown input `%U'",
2807                               format_unformat_error, input);
2808
2809   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2810   switch (rv)
2811     {
2812     case 1:
2813       break;
2814
2815     case -1:
2816       return clib_error_return (0, "no such FIB table %d", table_id);
2817
2818     default:
2819       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2820       break;
2821     }
2822
2823   return 0;
2824 }
2825
2826 /*?
2827  * Configure the set of IPv6 fields used by the flow hash.
2828  *
2829  * @cliexpar
2830  * @parblock
2831  * Example of how to set the flow hash on a given table:
2832  * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
2833  *
2834  * Example of display the configured flow hash:
2835  * @cliexstart{show ip6 fib}
2836  * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2837  * @::/0
2838  *   unicast-ip6-chain
2839  *   [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
2840  *     [0] [@0]: dpo-drop ip6
2841  * fe80::/10
2842  *   unicast-ip6-chain
2843  *   [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
2844  *     [0] [@2]: dpo-receive
2845  * ff02::1/128
2846  *   unicast-ip6-chain
2847  *   [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
2848  *     [0] [@2]: dpo-receive
2849  * ff02::2/128
2850  *   unicast-ip6-chain
2851  *   [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
2852  *     [0] [@2]: dpo-receive
2853  * ff02::16/128
2854  *   unicast-ip6-chain
2855  *   [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
2856  *     [0] [@2]: dpo-receive
2857  * ff02::1:ff00:0/104
2858  *   unicast-ip6-chain
2859  *   [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
2860  *     [0] [@2]: dpo-receive
2861  * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
2862  * @::/0
2863  *   unicast-ip6-chain
2864  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2865  *     [0] [@0]: dpo-drop ip6
2866  * @::a:1:1:0:4/126
2867  *   unicast-ip6-chain
2868  *   [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
2869  *     [0] [@4]: ipv6-glean: af_packet0
2870  * @::a:1:1:0:7/128
2871  *   unicast-ip6-chain
2872  *   [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
2873  *     [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
2874  * fe80::/10
2875  *   unicast-ip6-chain
2876  *   [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
2877  *     [0] [@2]: dpo-receive
2878  * fe80::fe:3eff:fe3e:9222/128
2879  *   unicast-ip6-chain
2880  *   [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
2881  *     [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
2882  * ff02::1/128
2883  *   unicast-ip6-chain
2884  *   [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
2885  *     [0] [@2]: dpo-receive
2886  * ff02::2/128
2887  *   unicast-ip6-chain
2888  *   [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
2889  *     [0] [@2]: dpo-receive
2890  * ff02::16/128
2891  *   unicast-ip6-chain
2892  *   [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
2893  *     [0] [@2]: dpo-receive
2894  * ff02::1:ff00:0/104
2895  *   unicast-ip6-chain
2896  *   [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
2897  *     [0] [@2]: dpo-receive
2898  * @cliexend
2899  * @endparblock
2900 ?*/
2901 /* *INDENT-OFF* */
2902 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2903     .path = "set ip6 flow-hash",
2904     .short_help =
2905     "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2906     .function = set_ip6_flow_hash_command_fn,
2907 };
2908 /* *INDENT-ON* */
2909
2910 static clib_error_t *
2911 show_ip6_local_command_fn (vlib_main_t * vm,
2912                            unformat_input_t * input,
2913                            vlib_cli_command_t * cmd)
2914 {
2915   ip6_main_t * im = &ip6_main;
2916   ip_lookup_main_t * lm = &im->lookup_main;
2917   int i;
2918
2919   vlib_cli_output (vm, "Protocols handled by ip6_local");
2920   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2921     {
2922       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2923         vlib_cli_output (vm, "%d", i);
2924     }
2925   return 0;
2926 }
2927
2928
2929
2930 /*?
2931  * Display the set of protocols handled by the local IPv6 stack.
2932  *
2933  * @cliexpar
2934  * Example of how to display local protocol table:
2935  * @cliexstart{show ip6 local}
2936  * Protocols handled by ip6_local
2937  * 17
2938  * 43
2939  * 58
2940  * 115
2941  * @cliexend
2942 ?*/
2943 /* *INDENT-OFF* */
2944 VLIB_CLI_COMMAND (show_ip6_local, static) = {
2945   .path = "show ip6 local",
2946   .function = show_ip6_local_command_fn,
2947   .short_help = "show ip6 local",
2948 };
2949 /* *INDENT-ON* */
2950
2951 int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2952                                  u32 table_index)
2953 {
2954   vnet_main_t * vnm = vnet_get_main();
2955   vnet_interface_main_t * im = &vnm->interface_main;
2956   ip6_main_t * ipm = &ip6_main;
2957   ip_lookup_main_t * lm = &ipm->lookup_main;
2958   vnet_classify_main_t * cm = &vnet_classify_main;
2959   ip6_address_t *if_addr;
2960
2961   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2962     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2963
2964   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2965     return VNET_API_ERROR_NO_SUCH_ENTRY;
2966
2967   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2968   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
2969
2970   if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL);
2971
2972   if (NULL != if_addr)
2973   {
2974       fib_prefix_t pfx = {
2975           .fp_len = 128,
2976           .fp_proto = FIB_PROTOCOL_IP6,
2977           .fp_addr.ip6 = *if_addr,
2978       };
2979       u32 fib_index;
2980
2981       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
2982                                                       sw_if_index);
2983
2984
2985       if (table_index != (u32) ~0)
2986       {
2987           dpo_id_t dpo = DPO_INVALID;
2988
2989           dpo_set(&dpo,
2990                   DPO_CLASSIFY,
2991                   DPO_PROTO_IP6,
2992                   classify_dpo_create(DPO_PROTO_IP6,
2993                                       table_index));
2994
2995           fib_table_entry_special_dpo_add(fib_index,
2996                                           &pfx,
2997                                           FIB_SOURCE_CLASSIFY,
2998                                           FIB_ENTRY_FLAG_NONE,
2999                                           &dpo);
3000           dpo_reset(&dpo);
3001       }
3002       else
3003       {
3004           fib_table_entry_special_remove(fib_index,
3005                                          &pfx,
3006                                          FIB_SOURCE_CLASSIFY);
3007       }
3008   }
3009
3010   return 0;
3011 }
3012
3013 static clib_error_t *
3014 set_ip6_classify_command_fn (vlib_main_t * vm,
3015                              unformat_input_t * input,
3016                              vlib_cli_command_t * cmd)
3017 {
3018   u32 table_index = ~0;
3019   int table_index_set = 0;
3020   u32 sw_if_index = ~0;
3021   int rv;
3022
3023   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3024     if (unformat (input, "table-index %d", &table_index))
3025       table_index_set = 1;
3026     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3027                        vnet_get_main(), &sw_if_index))
3028         ;
3029     else
3030         break;
3031   }
3032
3033   if (table_index_set == 0)
3034       return clib_error_return (0, "classify table-index must be specified");
3035
3036   if (sw_if_index == ~0)
3037     return clib_error_return (0, "interface / subif must be specified");
3038
3039   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
3040
3041   switch (rv)
3042     {
3043     case 0:
3044       break;
3045
3046     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3047       return clib_error_return (0, "No such interface");
3048
3049     case VNET_API_ERROR_NO_SUCH_ENTRY:
3050       return clib_error_return (0, "No such classifier table");
3051     }
3052   return 0;
3053 }
3054
3055 /*?
3056  * Assign a classification table to an interface. The classification
3057  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3058  * commands. Once the table is create, use this command to filter packets
3059  * on an interface.
3060  *
3061  * @cliexpar
3062  * Example of how to assign a classification table to an interface:
3063  * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
3064 ?*/
3065 /* *INDENT-OFF* */
3066 VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
3067     .path = "set ip6 classify",
3068     .short_help =
3069     "set ip6 classify intfc <interface> table-index <classify-idx>",
3070     .function = set_ip6_classify_command_fn,
3071 };
3072 /* *INDENT-ON* */
3073
3074 static clib_error_t *
3075 ip6_config (vlib_main_t * vm, unformat_input_t * input)
3076 {
3077   ip6_main_t * im = &ip6_main;
3078   uword heapsize = 0;
3079   u32 tmp;
3080   u32 nbuckets = 0;
3081
3082   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3083     if (unformat (input, "hash-buckets %d", &tmp))
3084       nbuckets = tmp;
3085     else if (unformat (input, "heap-size %dm", &tmp))
3086       heapsize = ((u64)tmp) << 20;
3087     else if (unformat (input, "heap-size %dM", &tmp))
3088       heapsize = ((u64)tmp) << 20;
3089     else if (unformat (input, "heap-size %dg", &tmp))
3090       heapsize = ((u64)tmp) << 30;
3091     else if (unformat (input, "heap-size %dG", &tmp))
3092       heapsize = ((u64)tmp) << 30;
3093     else
3094       return clib_error_return (0, "unknown input '%U'",
3095                                 format_unformat_error, input);
3096   }
3097
3098   im->lookup_table_nbuckets = nbuckets;
3099   im->lookup_table_size = heapsize;
3100
3101   return 0;
3102 }
3103
3104 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");