Initial commit of vpp code.
[vpp.git] / vnet / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
44 #include <vppinfra/cache.h>
45
46 #include <vppinfra/bihash_template.c>
47
48 static void compute_prefix_lengths_in_search_order (ip6_main_t * im)
49 {
50   int i;
51   vec_reset_length (im->prefix_lengths_in_search_order);
52   /* Note: bitmap reversed so this is in fact a longest prefix match */
53   clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap,
54   ({
55     int dst_address_length = 128 - i;
56     vec_add1 (im->prefix_lengths_in_search_order, dst_address_length);
57   }));
58 }
59
60 u32 
61 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst)
62 {
63   ip_lookup_main_t * lm = &im->lookup_main;
64   int i, len;
65   int rv;
66   BVT(clib_bihash_kv) kv, value;
67
68   len = vec_len (im->prefix_lengths_in_search_order);
69
70   for (i = 0; i < len; i++)
71     {
72       int dst_address_length = im->prefix_lengths_in_search_order[i];
73       ip6_address_t * mask = &im->fib_masks[dst_address_length];
74       
75       ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
76       
77       kv.key[0] = dst->as_u64[0] & mask->as_u64[0];
78       kv.key[1] = dst->as_u64[1] & mask->as_u64[1];
79       kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
80       
81       rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
82       if (rv == 0)
83         return value.value;
84     }
85
86   return lm->miss_adj_index;
87 }
88
89 u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst)
90 {
91     u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
92     return ip6_fib_lookup_with_table (im, fib_index, dst);
93 }
94
95 void
96 vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index)
97 {
98   ip_lookup_main_t * lm = &im->lookup_main;
99   ip6_add_del_route_args_t a;
100   ip_adjacency_t * adj;
101
102   memset(&a, 0x0, sizeof(ip6_add_del_route_args_t));
103
104   a.table_index_or_table_id = fib_index;
105   a.flags = (IP6_ROUTE_FLAG_ADD
106              | IP6_ROUTE_FLAG_FIB_INDEX
107              | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
108              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
109
110   /* Add ff02::1:ff00:0/104 via local route for all tables.
111      This is required for neighbor discovery to work. */
112   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
113                           &a.adj_index);
114   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
115   adj->if_address_index = ~0;
116   adj->rewrite_header.data_bytes = 0;
117
118   ip6_set_solicited_node_multicast_address (&a.dst_address, 0);
119
120   a.dst_address_length = 104;
121   ip6_add_del_route (im, &a);
122
123   /* Add all-routers multicast address via local route for all tables */
124   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
125                           &a.adj_index);
126   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
127   adj->if_address_index = ~0;
128   adj->rewrite_header.data_bytes = 0;
129
130   ip6_set_reserved_multicast_address (&a.dst_address,
131                                       IP6_MULTICAST_SCOPE_link_local,
132                                       IP6_MULTICAST_GROUP_ID_all_routers);
133   
134   a.dst_address_length = 128;  
135   ip6_add_del_route (im, &a);
136
137   /* Add all-nodes multicast address via local route for all tables */
138   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
139                           &a.adj_index);
140   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
141   adj->if_address_index = ~0;
142   adj->rewrite_header.data_bytes = 0;
143
144   ip6_set_reserved_multicast_address (&a.dst_address,
145                                       IP6_MULTICAST_SCOPE_link_local,
146                                       IP6_MULTICAST_GROUP_ID_all_hosts);
147
148   a.dst_address_length = 128;
149   ip6_add_del_route (im, &a);
150
151   /* Add all-mldv2  multicast address via local route for all tables */
152   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
153                           &a.adj_index);
154   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
155   adj->if_address_index = ~0;
156   adj->rewrite_header.data_bytes = 0;
157   
158   ip6_set_reserved_multicast_address (&a.dst_address,
159                                       IP6_MULTICAST_SCOPE_link_local,
160                                       IP6_MULTICAST_GROUP_ID_mldv2_routers);
161
162   a.dst_address_length = 128;
163   ip6_add_del_route (im, &a);
164 }
165
166 static ip6_fib_t *
167 create_fib_with_table_id (ip6_main_t * im, u32 table_id)
168 {
169   ip6_fib_t * fib;
170   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
171   vec_add2 (im->fibs, fib, 1);
172   fib->table_id = table_id;
173   fib->index = fib - im->fibs;
174   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
175   vnet_ip6_fib_init (im, fib->index);
176   return fib;
177 }
178
179 ip6_fib_t *
180 find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags)
181 {
182   uword * p, fib_index;
183
184   fib_index = table_index_or_id;
185   if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX))
186     {
187       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
188       if (! p)
189         return create_fib_with_table_id (im, table_index_or_id);
190       fib_index = p[0];
191     }
192   return vec_elt_at_index (im->fibs, fib_index);
193 }
194
195 void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a)
196 {
197   ip_lookup_main_t * lm = &im->lookup_main;
198   ip6_fib_t * fib;
199   ip6_address_t dst_address;
200   u32 dst_address_length, adj_index;
201   uword is_del;
202   u32 old_adj_index = ~0;
203   BVT(clib_bihash_kv) kv, value;
204
205   vlib_smp_unsafe_warning();
206
207   is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0;
208
209   /* Either create new adjacency or use given one depending on arguments. */
210   if (a->n_add_adj > 0)
211     {
212       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
213       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
214     }
215   else
216     adj_index = a->adj_index;
217
218   dst_address = a->dst_address;
219   dst_address_length = a->dst_address_length;
220   fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, 
221                                            a->flags);
222
223   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
224   ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]);
225
226   /* refcount accounting */
227   if (is_del)
228     {
229       ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0);
230       if (--im->dst_address_length_refcounts[dst_address_length] == 0)
231         {
232           im->non_empty_dst_address_length_bitmap =
233             clib_bitmap_set (im->non_empty_dst_address_length_bitmap, 
234                              128 - dst_address_length, 0);
235           compute_prefix_lengths_in_search_order (im);
236         }
237     }
238   else
239     {
240       im->dst_address_length_refcounts[dst_address_length]++;
241
242       im->non_empty_dst_address_length_bitmap =
243         clib_bitmap_set (im->non_empty_dst_address_length_bitmap, 
244                              128 - dst_address_length, 1);
245       compute_prefix_lengths_in_search_order (im);
246     }
247     
248   kv.key[0] = dst_address.as_u64[0];
249   kv.key[1] = dst_address.as_u64[1];
250   kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
251
252   if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
253     old_adj_index = value.value;
254
255   if (is_del)
256     BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */);
257   else
258     {
259       /* Make sure adj index is valid. */
260       if (CLIB_DEBUG > 0)
261         (void) ip_get_adjacency (lm, adj_index);
262
263       kv.value = adj_index;
264
265       BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */);
266     }
267
268   /* Delete old adjacency index if present and changed. */
269   {
270     if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
271         && old_adj_index != ~0
272         && old_adj_index != adj_index)
273       ip_del_adjacency (lm, old_adj_index);
274   }
275 }
276
277 void
278 ip6_add_del_route_next_hop (ip6_main_t * im,
279                             u32 flags,
280                             ip6_address_t * dst_address,
281                             u32 dst_address_length,
282                             ip6_address_t * next_hop,
283                             u32 next_hop_sw_if_index,
284                             u32 next_hop_weight, u32 adj_index,
285                             u32 explicit_fib_index)
286 {
287   vnet_main_t * vnm = vnet_get_main();
288   ip_lookup_main_t * lm = &im->lookup_main;
289   u32 fib_index;
290   ip6_fib_t * fib;
291   ip6_address_t masked_dst_address;
292   u32 old_mp_adj_index, new_mp_adj_index;
293   u32 dst_adj_index, nh_adj_index;
294   int rv;
295   ip_adjacency_t * dst_adj;
296   ip_multipath_adjacency_t * old_mp, * new_mp;
297   int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0;
298   int is_interface_next_hop;
299   clib_error_t * error = 0;
300   uword * nh_result;
301   BVT(clib_bihash_kv) kv, value;
302
303   vlib_smp_unsafe_warning();
304
305   if (explicit_fib_index == (u32)~0)
306     fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
307   else
308     fib_index = explicit_fib_index;
309
310   fib = vec_elt_at_index (im->fibs, fib_index);
311
312   /* Lookup next hop to be added or deleted. */
313   is_interface_next_hop = ip6_address_is_zero (next_hop);
314   if (adj_index == (u32)~0)
315     {
316       if (is_interface_next_hop)
317         {
318           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, 
319                                 next_hop_sw_if_index);
320           if (nh_result)
321             nh_adj_index = *nh_result;
322           else
323             {
324               ip_adjacency_t * adj;
325               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
326                                       &nh_adj_index);
327               ip6_adjacency_set_interface_route (vnm, adj, 
328                                                  next_hop_sw_if_index, ~0);
329               ip_call_add_del_adjacency_callbacks 
330                 (lm, next_hop_sw_if_index, /* is_del */ 0);
331               hash_set (im->interface_route_adj_index_by_sw_if_index, 
332                         next_hop_sw_if_index, nh_adj_index);
333             }
334         }
335       else
336         {
337           /* Look for the interface /128 route */
338           kv.key[0] = next_hop->as_u64[0];
339           kv.key[1] = next_hop->as_u64[1];
340           kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
341
342           if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
343             {
344               vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
345               error = clib_error_return (0, "next-hop %U/128 not in FIB",
346                                          format_ip6_address, next_hop);
347               goto done;
348             }
349           
350           nh_adj_index = value.value;
351         }
352     }
353   else
354     {
355       /* Look for the interface /128 route */
356       kv.key[0] = next_hop->as_u64[0];
357       kv.key[1] = next_hop->as_u64[1];
358       kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
359       
360       if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
361         {
362           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
363           error = clib_error_return (0, "next-hop %U/128 not in FIB",
364                                      format_ip6_address, next_hop);
365           goto done;
366         }
367       
368       nh_adj_index = value.value;
369     }
370
371   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
372   masked_dst_address = dst_address[0];
373   ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]);
374
375   kv.key[0] = masked_dst_address.as_u64[0];
376   kv.key[1] = masked_dst_address.as_u64[1];
377   kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
378
379   rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value);
380
381   if (rv == 0)
382     {
383       dst_adj_index = value.value;
384       dst_adj = ip_get_adjacency (lm, dst_adj_index);
385     }
386   else
387     {
388       /* For deletes destination must be known. */
389       if (is_del)
390         {
391           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
392           error = clib_error_return (0, "unknown destination %U/%d",
393                                      format_ip6_address, dst_address,
394                                      dst_address_length);
395           goto done;
396         }
397
398       dst_adj_index = ~0;
399       dst_adj = 0;
400     }
401
402   /* Ignore adds of X/128 with next hop of X. */
403   if (! is_del
404       && dst_address_length == 128
405       && ip6_address_is_equal (dst_address, next_hop))
406     {
407       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
408       error = clib_error_return (0, "prefix matches next hop %U/%d",
409                                  format_ip6_address, dst_address,
410                                  dst_address_length);
411       goto done;
412     }
413
414   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
415
416   if (! ip_multipath_adjacency_add_del_next_hop
417       (lm, is_del,
418        dst_adj ? dst_adj->heap_handle : ~0,
419        nh_adj_index,
420        next_hop_weight,
421        &new_mp_adj_index))
422     {
423       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
424       error = clib_error_return 
425         (0, "requested deleting next-hop %U not found in multi-path",
426          format_ip6_address, next_hop);
427       goto done;
428     }
429   
430   old_mp = new_mp = 0;
431   if (old_mp_adj_index != ~0)
432     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
433   if (new_mp_adj_index != ~0)
434     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
435
436   if (old_mp != new_mp)
437     {
438       ip6_add_del_route_args_t a;
439       a.table_index_or_table_id = fib_index;
440       a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
441                  | IP6_ROUTE_FLAG_FIB_INDEX
442                  | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
443                  | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE));
444       a.dst_address = dst_address[0];
445       a.dst_address_length = dst_address_length;
446       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
447       a.add_adj = 0;
448       a.n_add_adj = 0;
449
450       ip6_add_del_route (im, &a);
451     }
452
453  done:
454   if (error)
455     clib_error_report (error);
456 }
457
458 u32
459 ip6_get_route (ip6_main_t * im,
460                u32 table_index_or_table_id,
461                u32 flags,
462                ip6_address_t * address,
463                u32 address_length)
464 {
465   ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
466   ip6_address_t masked_address;
467   BVT(clib_bihash_kv) kv, value;
468
469   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
470   memcpy (&masked_address, address, sizeof (masked_address));
471   ip6_address_mask (&masked_address, &im->fib_masks[address_length]);
472
473   kv.key[0] = masked_address.as_u64[0];
474   kv.key[1] = masked_address.as_u64[1];
475   kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length;
476
477   if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
478     return (value.value);
479   return 0;
480 }
481
482 void
483 ip6_foreach_matching_route (ip6_main_t * im,
484                             u32 table_index_or_table_id,
485                             u32 flags,
486                             ip6_address_t * dst_address,
487                             u32 address_length,
488                             ip6_address_t ** results,
489                             u8 ** result_lengths)
490 {
491   ip6_fib_t * fib = 
492     find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
493   BVT(clib_bihash) * h = &im->ip6_lookup_table;
494   BVT(clib_bihash_value) * v;
495   clib_bihash_bucket_t * b;
496   int i, j, k;
497   
498   if (*results)
499     _vec_len (*results) = 0;
500   if (*result_lengths)
501     _vec_len (*result_lengths) = 0;
502
503   /* Walk the table looking for routes which match the supplied address */
504   for (i = 0; i < h->nbuckets; i++)
505     {
506       b = &h->buckets [i];
507       if (b->offset == 0)
508           continue;
509
510       v = BV(clib_bihash_get_value) (h, b->offset);
511       for (j = 0; j < (1<<b->log2_pages); j++)
512         {
513           for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
514             {
515               if (BV(clib_bihash_is_free)(&v->kvp[k]))
516                 continue;
517               
518               if ((v->kvp[k].key[2] 
519                    == (((u64)((fib - im->fibs))<<32) | address_length))
520                   && ip6_destination_matches_route 
521                   (im, dst_address, (ip6_address_t *) &v->kvp[k], 
522                    address_length))
523                 {
524                   ip6_address_t * a;
525
526                   a = (ip6_address_t *)(&v->kvp[k]);
527
528                   vec_add1 (*results, a[0]);
529                   vec_add1 (*result_lengths, address_length);
530                 }
531             }
532           v++;
533         }
534     }
535 }
536
537 void ip6_maybe_remap_adjacencies (ip6_main_t * im,
538                                   u32 table_index_or_table_id,
539                                   u32 flags)
540 {
541 #if SOONE
542   ip6_fib_t * fib 
543     = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544 #endif
545   ip_lookup_main_t * lm = &im->lookup_main;
546
547   if (lm->n_adjacency_remaps == 0)
548     return;
549
550   clib_warning ("unimplemented, please report to vpp-dev@cisco.com");
551
552   /* All remaps have been performed. */
553   lm->n_adjacency_remaps = 0;
554 }
555
556 void ip6_delete_matching_routes (ip6_main_t * im,
557                                  u32 table_index_or_table_id,
558                                  u32 flags,
559                                  ip6_address_t * address,
560                                  u32 address_length)
561 {
562   /* $$$$ static may be OK - this should happen only on thread 0 */
563   static ip6_address_t * matching_addresses;
564   static u8 * matching_address_lengths;
565   u32 l, i;
566   ip6_add_del_route_args_t a;
567
568   vlib_smp_unsafe_warning();
569
570   a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
571   a.table_index_or_table_id = table_index_or_table_id;
572   a.adj_index = ~0;
573   a.add_adj = 0;
574   a.n_add_adj = 0;
575
576   for (l = address_length + 1; l <= 128; l++)
577     {
578       ip6_foreach_matching_route (im, table_index_or_table_id, flags,
579                                   address,
580                                   l,
581                                   &matching_addresses,
582                                   &matching_address_lengths);
583       for (i = 0; i < vec_len (matching_addresses); i++)
584         {
585           a.dst_address = matching_addresses[i];
586           a.dst_address_length = matching_address_lengths[i];
587           ip6_add_del_route (im, &a);
588         }
589     }
590
591   ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
592 }
593
594 static uword
595 ip6_lookup (vlib_main_t * vm,
596             vlib_node_runtime_t * node,
597             vlib_frame_t * frame)
598 {
599   ip6_main_t * im = &ip6_main;
600   ip_lookup_main_t * lm = &im->lookup_main;
601   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
602   u32 n_left_from, n_left_to_next, * from, * to_next;
603   ip_lookup_next_t next;
604   u32 cpu_index = os_get_cpu_number();
605
606   from = vlib_frame_vector_args (frame);
607   n_left_from = frame->n_vectors;
608   next = node->cached_next_index;
609
610   while (n_left_from > 0)
611     {
612       vlib_get_next_frame (vm, node, next,
613                            to_next, n_left_to_next);
614
615       while (n_left_from >= 4 && n_left_to_next >= 2)
616         {
617           vlib_buffer_t * p0, * p1;
618           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
619           ip_lookup_next_t next0, next1;
620           ip6_header_t * ip0, * ip1;
621           ip_adjacency_t * adj0, * adj1;
622           u32 fib_index0, fib_index1;
623           u32 flow_hash_config0, flow_hash_config1;
624
625           /* Prefetch next iteration. */
626           {
627             vlib_buffer_t * p2, * p3;
628
629             p2 = vlib_get_buffer (vm, from[2]);
630             p3 = vlib_get_buffer (vm, from[3]);
631
632             vlib_prefetch_buffer_header (p2, LOAD);
633             vlib_prefetch_buffer_header (p3, LOAD);
634             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
635             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
636           }
637
638           pi0 = to_next[0] = from[0];
639           pi1 = to_next[1] = from[1];
640
641           p0 = vlib_get_buffer (vm, pi0);
642           p1 = vlib_get_buffer (vm, pi1);
643
644           ip0 = vlib_buffer_get_current (p0);
645           ip1 = vlib_buffer_get_current (p1);
646
647           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
648           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
649
650           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
651             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
652           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
653             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
654
655           adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, 
656                                                   &ip0->dst_address);
657           adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, 
658                                                   &ip1->dst_address);
659
660           adj0 = ip_get_adjacency (lm, adj_index0);
661           adj1 = ip_get_adjacency (lm, adj_index1);
662
663           if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
664             {
665               adj_index0 = ip6_fib_lookup_with_table 
666                 (im, adj0->explicit_fib_index, &ip0->dst_address);
667               adj0 = ip_get_adjacency (lm, adj_index0);
668             }
669           if (PREDICT_FALSE (adj1->explicit_fib_index != ~0))
670             {
671               adj_index1 = ip6_fib_lookup_with_table 
672                 (im, adj1->explicit_fib_index, &ip1->dst_address);
673               adj1 = ip_get_adjacency (lm, adj_index1);
674             }
675
676           next0 = adj0->lookup_next_index;
677           next1 = adj1->lookup_next_index;
678
679           /* Process hop-by-hop options if present */
680           next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
681               IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
682           next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
683               IP_LOOKUP_NEXT_HOP_BY_HOP : next1;
684
685           vnet_buffer (p0)->ip.flow_hash = 
686             vnet_buffer(p1)->ip.flow_hash = 0;
687
688           if (PREDICT_FALSE(adj0->n_adj > 1))
689             {
690               flow_hash_config0 = 
691                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
692               vnet_buffer (p0)->ip.flow_hash = 
693                 ip6_compute_flow_hash (ip0, flow_hash_config0);
694             }
695
696           if (PREDICT_FALSE(adj1->n_adj > 1))
697             {
698               flow_hash_config1 = 
699                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
700
701               vnet_buffer (p1)->ip.flow_hash = 
702                 ip6_compute_flow_hash (ip1, flow_hash_config1);
703             }
704
705           ASSERT (adj0->n_adj > 0);
706           ASSERT (adj1->n_adj > 0);
707           ASSERT (is_pow2 (adj0->n_adj));
708           ASSERT (is_pow2 (adj1->n_adj));
709           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
710           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
711
712           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
713           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
714
715           vlib_increment_combined_counter 
716               (cm, cpu_index, adj_index0, 1,
717                vlib_buffer_length_in_chain (vm, p0));
718           vlib_increment_combined_counter 
719               (cm, cpu_index, adj_index1, 1,
720                vlib_buffer_length_in_chain (vm, p1));
721
722           from += 2;
723           to_next += 2;
724           n_left_to_next -= 2;
725           n_left_from -= 2;
726
727           wrong_next = (next0 != next) + 2*(next1 != next);
728           if (PREDICT_FALSE (wrong_next != 0))
729             {
730               switch (wrong_next)
731                 {
732                 case 1:
733                   /* A B A */
734                   to_next[-2] = pi1;
735                   to_next -= 1;
736                   n_left_to_next += 1;
737                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
738                   break;
739
740                 case 2:
741                   /* A A B */
742                   to_next -= 1;
743                   n_left_to_next += 1;
744                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
745                   break;
746
747                 case 3:
748                   /* A B C */
749                   to_next -= 2;
750                   n_left_to_next += 2;
751                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
752                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
753                   if (next0 == next1)
754                     {
755                       /* A B B */
756                       vlib_put_next_frame (vm, node, next, n_left_to_next);
757                       next = next1;
758                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
759                     }
760                 }
761             }
762         }
763     
764       while (n_left_from > 0 && n_left_to_next > 0)
765         {
766           vlib_buffer_t * p0;
767           ip6_header_t * ip0;
768           u32 pi0, adj_index0;
769           ip_lookup_next_t next0;
770           ip_adjacency_t * adj0;
771           u32 fib_index0, flow_hash_config0;
772
773           pi0 = from[0];
774           to_next[0] = pi0;
775
776           p0 = vlib_get_buffer (vm, pi0);
777
778           ip0 = vlib_buffer_get_current (p0);
779
780           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
781           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
782             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
783
784           flow_hash_config0 = 
785               vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
786
787           adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, 
788                                                   &ip0->dst_address);
789
790           adj0 = ip_get_adjacency (lm, adj_index0);
791
792           if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
793             {
794               adj_index0 = ip6_fib_lookup_with_table 
795                 (im, adj0->explicit_fib_index, &ip0->dst_address);
796               adj0 = ip_get_adjacency (lm, adj_index0);
797             }
798
799           next0 = adj0->lookup_next_index;
800           next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
801               IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
802
803           vnet_buffer (p0)->ip.flow_hash = 0;
804
805           if (PREDICT_FALSE(adj0->n_adj > 1))
806             {
807               flow_hash_config0 = 
808                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
809               vnet_buffer (p0)->ip.flow_hash = 
810                 ip6_compute_flow_hash (ip0, flow_hash_config0);
811             }
812
813           ASSERT (adj0->n_adj > 0);
814           ASSERT (is_pow2 (adj0->n_adj));
815           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
816
817           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
818
819           vlib_increment_combined_counter 
820               (cm, cpu_index, adj_index0, 1,
821                vlib_buffer_length_in_chain (vm, p0));
822
823           from += 1;
824           to_next += 1;
825           n_left_to_next -= 1;
826           n_left_from -= 1;
827
828           if (PREDICT_FALSE (next0 != next))
829             {
830               n_left_to_next += 1;
831               vlib_put_next_frame (vm, node, next, n_left_to_next);
832               next = next0;
833               vlib_get_next_frame (vm, node, next,
834                                    to_next, n_left_to_next);
835               to_next[0] = pi0;
836               to_next += 1;
837               n_left_to_next -= 1;
838             }
839         }
840
841       vlib_put_next_frame (vm, node, next, n_left_to_next);
842     }
843
844   return frame->n_vectors;
845 }
846
847 void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
848                                         ip_adjacency_t * adj,
849                                         u32 sw_if_index,
850                                         u32 if_address_index)
851 {
852   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
853   ip_lookup_next_t n;
854   u32 node_index;
855
856   if (hw->hw_class_index == ethernet_hw_interface_class.index
857       || hw->hw_class_index == srp_hw_interface_class.index)
858     {
859       n = IP_LOOKUP_NEXT_ARP;
860       node_index = ip6_discover_neighbor_node.index;
861       adj->if_address_index = if_address_index;
862   }
863   else
864     {
865       n = IP_LOOKUP_NEXT_REWRITE;
866       node_index = ip6_rewrite_node.index;
867     }
868
869  adj->lookup_next_index = n;
870  adj->explicit_fib_index = ~0;
871
872  vnet_rewrite_for_sw_interface
873    (vnm,
874     VNET_L3_PACKET_TYPE_IP6,
875     sw_if_index,
876     node_index,
877     VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
878     &adj->rewrite_header,
879     sizeof (adj->rewrite_data));
880 }
881
882 static void
883 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
884                           ip6_main_t * im, u32 fib_index,
885                           ip_interface_address_t * a)
886 {
887   ip_lookup_main_t * lm = &im->lookup_main;
888   ip_adjacency_t * adj;
889   ip6_address_t * address = ip_interface_address_get_address (lm, a);
890   ip6_add_del_route_args_t x;
891   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
892   u32 classify_table_index;
893
894   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
895   x.table_index_or_table_id = fib_index;
896   x.flags = (IP6_ROUTE_FLAG_ADD
897              | IP6_ROUTE_FLAG_FIB_INDEX
898              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
899   x.dst_address = address[0];
900   x.dst_address_length = a->address_length;
901   x.n_add_adj = 0;
902   x.add_adj = 0;
903
904   a->neighbor_probe_adj_index = ~0;
905   if (a->address_length < 128)
906     {
907       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
908                               &x.adj_index);
909       ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
910       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
911       ip6_add_del_route (im, &x);
912       a->neighbor_probe_adj_index = x.adj_index;
913     }
914
915   /* Add e.g. ::1/128 as local to this host. */
916   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
917                           &x.adj_index);
918
919   classify_table_index = ~0;
920   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
921     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
922   if (classify_table_index != (u32) ~0)
923     {
924       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
925       adj->classify_table_index = classify_table_index;
926     }
927   else
928     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
929   
930   adj->if_address_index = a - lm->if_address_pool;
931   adj->rewrite_header.sw_if_index = sw_if_index;
932   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
933   adj->rewrite_header.data_bytes = 0;
934   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
935   x.dst_address_length = 128;
936   ip6_add_del_route (im, &x);
937 }
938
939 static void
940 ip6_del_interface_routes (ip6_main_t * im, u32 fib_index,
941                           ip6_address_t * address, u32 address_length)
942 {
943   ip6_add_del_route_args_t x;
944
945   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
946   x.table_index_or_table_id = fib_index;
947   x.flags = (IP6_ROUTE_FLAG_DEL
948              | IP6_ROUTE_FLAG_FIB_INDEX
949              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
950   x.dst_address = address[0];
951   x.dst_address_length = address_length;
952   x.adj_index = ~0;
953   x.n_add_adj = 0;
954   x.add_adj = 0;
955
956   if (address_length < 128)
957     {
958       /* Don't wipe out fe80::0/64 */
959       if (address_length != 64 || 
960           address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL))
961         ip6_add_del_route (im, &x);
962     }
963
964   x.dst_address_length = 128;
965   ip6_add_del_route (im, &x);
966
967   ip6_delete_matching_routes (im,
968                               fib_index,
969                               IP6_ROUTE_FLAG_FIB_INDEX,
970                               address,
971                               address_length);
972 }
973
974 typedef struct {
975     u32 sw_if_index;
976     ip6_address_t address;
977     u32 length;
978 } ip6_interface_address_t;
979
980 static clib_error_t *
981 ip6_add_del_interface_address_internal (vlib_main_t * vm,
982                                         u32 sw_if_index,
983                                         ip6_address_t * new_address,
984                                         u32 new_length,
985                                         u32 redistribute,
986                                         u32 insert_routes,
987                                         u32 is_del);
988
989 static clib_error_t *
990 ip6_add_del_interface_address_internal (vlib_main_t * vm,
991                                         u32 sw_if_index,
992                                         ip6_address_t * address,
993                                         u32 address_length,
994                                         u32 redistribute,
995                                         u32 insert_routes,
996                                         u32 is_del)
997 {
998   vnet_main_t * vnm = vnet_get_main();
999   ip6_main_t * im = &ip6_main;
1000   ip_lookup_main_t * lm = &im->lookup_main;
1001   clib_error_t * error;
1002   u32 if_address_index;
1003   ip6_address_fib_t ip6_af, * addr_fib = 0;
1004
1005   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1006   ip6_addr_fib_init (&ip6_af, address,
1007                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1008   vec_add1 (addr_fib, ip6_af);
1009
1010   {
1011     uword elts_before = pool_elts (lm->if_address_pool);
1012
1013     error = ip_interface_address_add_del
1014       (lm,
1015        sw_if_index,
1016        addr_fib,
1017        address_length,
1018        is_del,
1019        &if_address_index);
1020     if (error)
1021       goto done;
1022
1023     /* Pool did not grow: add duplicate address. */
1024     if (elts_before == pool_elts (lm->if_address_pool))
1025       goto done;
1026   }
1027
1028   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1029     {
1030       if (is_del)
1031         ip6_del_interface_routes (im, ip6_af.fib_index, address,
1032                                   address_length);
1033
1034       else
1035         ip6_add_interface_routes (vnm, sw_if_index,
1036                                   im, ip6_af.fib_index,
1037                                   pool_elt_at_index (lm->if_address_pool, if_address_index));
1038     }
1039
1040   {
1041     ip6_add_del_interface_address_callback_t * cb;
1042     vec_foreach (cb, im->add_del_interface_address_callbacks)
1043       cb->function (im, cb->function_opaque, sw_if_index,
1044                     address, address_length,
1045                     if_address_index,
1046                     is_del);
1047   }
1048
1049  done:
1050   vec_free (addr_fib);
1051   return error;
1052 }
1053
1054 clib_error_t *
1055 ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1056                                ip6_address_t * address, u32 address_length,
1057                                u32 is_del)
1058 {
1059   return ip6_add_del_interface_address_internal
1060     (vm, sw_if_index, address, address_length,
1061      /* redistribute */ 1,
1062      /* insert_routes */ 1,
1063      is_del);
1064 }
1065
1066 clib_error_t *
1067 ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
1068                                 u32 sw_if_index,
1069                                 u32 flags)
1070 {
1071   ip6_main_t * im = &ip6_main;
1072   ip_interface_address_t * ia;
1073   ip6_address_t * a;
1074   u32 is_admin_up, fib_index;
1075
1076   /* Fill in lookup tables with default table (0). */
1077   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1078
1079   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1080
1081   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1082
1083   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1084
1085   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1086                                 0 /* honor unnumbered */,
1087   ({
1088     a = ip_interface_address_get_address (&im->lookup_main, ia);
1089     if (is_admin_up)
1090       ip6_add_interface_routes (vnm, sw_if_index,
1091                                 im, fib_index,
1092                                 ia);
1093     else
1094       ip6_del_interface_routes (im, fib_index,
1095                                 a, ia->address_length);
1096   }));
1097
1098   return 0;
1099 }
1100
1101 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
1102
1103 clib_error_t *
1104 ip6_sw_interface_add_del (vnet_main_t * vnm,
1105                           u32 sw_if_index,
1106                           u32 is_add)
1107 {
1108   vlib_main_t * vm = vnm->vlib_main;
1109   ip6_main_t * im = &ip6_main;
1110   ip_lookup_main_t * lm = &im->lookup_main;
1111   u32 ci, cast;
1112
1113   for (cast = 0; cast < VNET_N_CAST; cast++)
1114     {
1115       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1116       vnet_config_main_t * vcm = &cm->config_main;
1117
1118       /* FIXME multicast. */
1119       if (! vcm->node_index_by_feature_index)
1120         {
1121           char * start_nodes[] = { "ip6-input", };
1122           char * feature_nodes[] = {
1123             [IP6_RX_FEATURE_CHECK_ACCESS] = "ip6-inacl",
1124             [IP6_RX_FEATURE_IPSEC] = "ipsec-input-ip6",
1125             [IP6_RX_FEATURE_L2TPV3] = "l2tp-decap",
1126             [IP6_RX_FEATURE_VPATH]  = "vpath-input-ip6",
1127             [IP6_RX_FEATURE_LOOKUP] = "ip6-lookup",
1128           };
1129           vnet_config_init (vm, vcm,
1130                             start_nodes, ARRAY_LEN (start_nodes),
1131                             feature_nodes, ARRAY_LEN (feature_nodes));
1132         }
1133
1134       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1135       ci = cm->config_index_by_sw_if_index[sw_if_index];
1136
1137       if (is_add)
1138         ci = vnet_config_add_feature (vm, vcm,
1139                                       ci,
1140                                       IP6_RX_FEATURE_LOOKUP,
1141                                       /* config data */ 0,
1142                                       /* # bytes of config data */ 0);
1143       else
1144         ci = vnet_config_del_feature (vm, vcm,
1145                                       ci,
1146                                       IP6_RX_FEATURE_LOOKUP,
1147                                       /* config data */ 0,
1148                                       /* # bytes of config data */ 0);
1149
1150       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1151     }
1152   return /* no error */ 0;
1153 }
1154
1155 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
1156
1157 VLIB_REGISTER_NODE (ip6_lookup_node) = {
1158   .function = ip6_lookup,
1159   .name = "ip6-lookup",
1160   .vector_size = sizeof (u32),
1161
1162   .n_next_nodes = IP_LOOKUP_N_NEXT,
1163   .next_nodes = {
1164     [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
1165     [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
1166     [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
1167     [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
1168     [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
1169     [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
1170     [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
1171     [IP_LOOKUP_NEXT_MAP] = "ip6-map",
1172     [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
1173     [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
1174     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
1175     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", 
1176     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", 
1177   },
1178 };
1179
1180 typedef struct {
1181   /* Adjacency taken. */
1182   u32 adj_index;
1183   u32 flow_hash;
1184
1185   /* Packet data, possibly *after* rewrite. */
1186   u8 packet_data[64 - 1*sizeof(u32)];
1187 } ip6_forward_next_trace_t;
1188
1189 static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
1190 {
1191   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1192   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1193   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
1194   vnet_main_t * vnm = vnet_get_main();
1195   ip6_main_t * im = &ip6_main;
1196   ip_adjacency_t * adj;
1197   uword indent = format_get_indent (s);
1198
1199   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1200   s = format (s, "adjacency: %U flow hash: 0x%08x",
1201               format_ip_adjacency,
1202               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1203   switch (adj->lookup_next_index)
1204     {
1205     case IP_LOOKUP_NEXT_REWRITE:
1206       s = format (s, "\n%U%U",
1207                   format_white_space, indent,
1208                   format_ip_adjacency_packet_data,
1209                   vnm, &im->lookup_main, t->adj_index,
1210                   t->packet_data, sizeof (t->packet_data));
1211       break;
1212
1213     default:
1214       break;
1215     }
1216
1217   return s;
1218 }
1219
1220 /* Common trace function for all ip6-forward next nodes. */
1221 void
1222 ip6_forward_next_trace (vlib_main_t * vm,
1223                         vlib_node_runtime_t * node,
1224                         vlib_frame_t * frame,
1225                         vlib_rx_or_tx_t which_adj_index)
1226 {
1227   u32 * from, n_left;
1228
1229   n_left = frame->n_vectors;
1230   from = vlib_frame_vector_args (frame);
1231   
1232   while (n_left >= 4)
1233     {
1234       u32 bi0, bi1;
1235       vlib_buffer_t * b0, * b1;
1236       ip6_forward_next_trace_t * t0, * t1;
1237
1238       /* Prefetch next iteration. */
1239       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1240       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1241
1242       bi0 = from[0];
1243       bi1 = from[1];
1244
1245       b0 = vlib_get_buffer (vm, bi0);
1246       b1 = vlib_get_buffer (vm, bi1);
1247
1248       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1249         {
1250           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1251           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1252           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1253           memcpy (t0->packet_data,
1254                   vlib_buffer_get_current (b0),
1255                   sizeof (t0->packet_data));
1256         }
1257       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1258         {
1259           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1260           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1261           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1262           memcpy (t1->packet_data,
1263                   vlib_buffer_get_current (b1),
1264                   sizeof (t1->packet_data));
1265         }
1266       from += 2;
1267       n_left -= 2;
1268     }
1269
1270   while (n_left >= 1)
1271     {
1272       u32 bi0;
1273       vlib_buffer_t * b0;
1274       ip6_forward_next_trace_t * t0;
1275
1276       bi0 = from[0];
1277
1278       b0 = vlib_get_buffer (vm, bi0);
1279
1280       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281         {
1282           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1283           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1284           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285           memcpy (t0->packet_data,
1286                   vlib_buffer_get_current (b0),
1287                   sizeof (t0->packet_data));
1288         }
1289       from += 1;
1290       n_left -= 1;
1291     }
1292 }
1293
1294 static uword
1295 ip6_drop_or_punt (vlib_main_t * vm,
1296                   vlib_node_runtime_t * node,
1297                   vlib_frame_t * frame,
1298                   ip6_error_t error_code)
1299 {
1300   u32 * buffers = vlib_frame_vector_args (frame);
1301   uword n_packets = frame->n_vectors;
1302
1303   vlib_error_drop_buffers (vm, node,
1304                            buffers,
1305                            /* stride */ 1,
1306                            n_packets,
1307                            /* next */ 0,
1308                            ip6_input_node.index,
1309                            error_code);
1310
1311   if (node->flags & VLIB_NODE_FLAG_TRACE)
1312     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1313
1314   return n_packets;
1315 }
1316
1317 static uword
1318 ip6_drop (vlib_main_t * vm,
1319           vlib_node_runtime_t * node,
1320           vlib_frame_t * frame)
1321 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
1322
1323 static uword
1324 ip6_punt (vlib_main_t * vm,
1325           vlib_node_runtime_t * node,
1326           vlib_frame_t * frame)
1327 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
1328
1329 static uword
1330 ip6_miss (vlib_main_t * vm,
1331           vlib_node_runtime_t * node,
1332           vlib_frame_t * frame)
1333 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); }
1334
1335 VLIB_REGISTER_NODE (ip6_drop_node,static) = {
1336   .function = ip6_drop,
1337   .name = "ip6-drop",
1338   .vector_size = sizeof (u32),
1339
1340   .format_trace = format_ip6_forward_next_trace,
1341
1342   .n_next_nodes = 1,
1343   .next_nodes = {
1344     [0] = "error-drop",
1345   },
1346 };
1347
1348 VLIB_REGISTER_NODE (ip6_punt_node,static) = {
1349   .function = ip6_punt,
1350   .name = "ip6-punt",
1351   .vector_size = sizeof (u32),
1352
1353   .format_trace = format_ip6_forward_next_trace,
1354
1355   .n_next_nodes = 1,
1356   .next_nodes = {
1357     [0] = "error-punt",
1358   },
1359 };
1360
1361 VLIB_REGISTER_NODE (ip6_miss_node,static) = {
1362   .function = ip6_miss,
1363   .name = "ip6-miss",
1364   .vector_size = sizeof (u32),
1365
1366   .format_trace = format_ip6_forward_next_trace,
1367
1368   .n_next_nodes = 1,
1369   .next_nodes = {
1370     [0] = "error-drop",
1371   },
1372 };
1373
1374 VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
1375   .function = ip6_drop,
1376   .name = "ip6-multicast",
1377   .vector_size = sizeof (u32),
1378
1379   .format_trace = format_ip6_forward_next_trace,
1380
1381   .n_next_nodes = 1,
1382   .next_nodes = {
1383     [0] = "error-drop",
1384   },
1385 };
1386
1387 /* Compute TCP/UDP/ICMP6 checksum in software. */
1388 u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
1389 {
1390   ip_csum_t sum0;
1391   u16 sum16, payload_length_host_byte_order;
1392   u32 i, n_this_buffer, n_bytes_left;
1393   u32 headers_size = sizeof(ip0[0]);
1394   void * data_this_buffer;
1395
1396   ASSERT(bogus_lengthp);
1397   *bogus_lengthp = 0;
1398
1399   /* Initialize checksum with ip header. */
1400   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1401   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1402   data_this_buffer = (void *) (ip0 + 1);
1403  
1404   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1405     {
1406       sum0 = ip_csum_with_carry (sum0,
1407                                  clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1408       sum0 = ip_csum_with_carry (sum0,
1409                                  clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1410     }
1411
1412   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1413   if (PREDICT_FALSE (ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1414     {
1415       u32  skip_bytes;
1416       ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)data_this_buffer;
1417
1418       /* validate really icmp6 next */
1419       ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
1420
1421       skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
1422       data_this_buffer  = (void *)((u8 *)data_this_buffer + skip_bytes);
1423  
1424       payload_length_host_byte_order  -= skip_bytes;
1425       headers_size += skip_bytes;
1426    }
1427
1428   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1429 #if DPDK > 0
1430   if (p0) 
1431   {
1432     struct rte_mbuf *mb = ((struct rte_mbuf *)p0)-1;
1433     u8 nb_segs = mb->nb_segs;
1434
1435     n_this_buffer = (p0->current_length > headers_size ?
1436                      p0->current_length - headers_size : 0);
1437     while (n_bytes_left)
1438       {
1439         sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1440         n_bytes_left -= n_this_buffer;
1441
1442         mb = mb->next;
1443         nb_segs--;
1444         if ((nb_segs == 0) || (mb == 0))
1445           break;
1446
1447         data_this_buffer = rte_ctrlmbuf_data(mb);
1448         n_this_buffer = mb->data_len;
1449       }
1450     if (n_bytes_left || nb_segs)
1451       {
1452         *bogus_lengthp = 1;
1453         return 0xfefe;
1454       }
1455   } 
1456   else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1457 #else
1458   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1459     n_this_buffer = p0->current_length > headers_size  ? p0->current_length - headers_size  : 0;
1460   while (1)
1461     {
1462       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1463       n_bytes_left -= n_this_buffer;
1464       if (n_bytes_left == 0)
1465         break;
1466
1467       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1468         {
1469           *bogus_lengthp = 1;
1470           return 0xfefe;
1471         }
1472       p0 = vlib_get_buffer (vm, p0->next_buffer);
1473       data_this_buffer = vlib_buffer_get_current (p0);
1474       n_this_buffer = p0->current_length;
1475     }
1476 #endif /* DPDK */
1477
1478   sum16 = ~ ip_csum_fold (sum0);
1479
1480   return sum16;
1481 }
1482
1483 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1484 {
1485   ip6_header_t * ip0 = vlib_buffer_get_current (p0);
1486   udp_header_t * udp0;
1487   u16 sum16;
1488   int bogus_length;
1489
1490   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1491   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1492           || ip0->protocol == IP_PROTOCOL_ICMP6
1493           || ip0->protocol == IP_PROTOCOL_UDP
1494           || ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1495
1496   udp0 = (void *) (ip0 + 1);
1497   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1498     {
1499       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1500                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1501       return p0->flags;
1502     }
1503
1504   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1505
1506   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1507                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1508
1509   return p0->flags;
1510 }
1511
1512 static uword
1513 ip6_local (vlib_main_t * vm,
1514            vlib_node_runtime_t * node,
1515            vlib_frame_t * frame)
1516 {
1517   ip6_main_t * im = &ip6_main;
1518   ip_lookup_main_t * lm = &im->lookup_main;
1519   ip_local_next_t next_index;
1520   u32 * from, * to_next, n_left_from, n_left_to_next;
1521   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1522
1523   from = vlib_frame_vector_args (frame);
1524   n_left_from = frame->n_vectors;
1525   next_index = node->cached_next_index;
1526   
1527   if (node->flags & VLIB_NODE_FLAG_TRACE)
1528     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1529
1530   while (n_left_from > 0)
1531     {
1532       vlib_get_next_frame (vm, node, next_index,
1533                            to_next, n_left_to_next);
1534
1535       while (n_left_from >= 4 && n_left_to_next >= 2)
1536         {
1537           vlib_buffer_t * p0, * p1;
1538           ip6_header_t * ip0, * ip1;
1539           udp_header_t * udp0, * udp1;
1540           u32 pi0, ip_len0, udp_len0, flags0, next0;
1541           u32 pi1, ip_len1, udp_len1, flags1, next1;
1542           i32 len_diff0, len_diff1;
1543           u8 error0, type0, good_l4_checksum0;
1544           u8 error1, type1, good_l4_checksum1;
1545       
1546           pi0 = to_next[0] = from[0];
1547           pi1 = to_next[1] = from[1];
1548           from += 2;
1549           n_left_from -= 2;
1550           to_next += 2;
1551           n_left_to_next -= 2;
1552       
1553           p0 = vlib_get_buffer (vm, pi0);
1554           p1 = vlib_get_buffer (vm, pi1);
1555
1556           ip0 = vlib_buffer_get_current (p0);
1557           ip1 = vlib_buffer_get_current (p1);
1558
1559           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1560           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1561
1562           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1563           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1564
1565           flags0 = p0->flags;
1566           flags1 = p1->flags;
1567
1568           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1569           good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1570
1571           udp0 = ip6_next_header (ip0);
1572           udp1 = ip6_next_header (ip1);
1573
1574           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1575           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1576           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
1577
1578           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1579           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1580
1581           /* Verify UDP length. */
1582           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1583           ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1584           udp_len0 = clib_net_to_host_u16 (udp0->length);
1585           udp_len1 = clib_net_to_host_u16 (udp1->length);
1586
1587           len_diff0 = ip_len0 - udp_len0;
1588           len_diff1 = ip_len1 - udp_len1;
1589
1590           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1591           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1592
1593           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1594                              && ! good_l4_checksum0
1595                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1596             {
1597               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1598               good_l4_checksum0 =
1599                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1600             }
1601           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1602                              && ! good_l4_checksum1
1603                              && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1604             {
1605               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1606               good_l4_checksum1 =
1607                 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1608             }
1609
1610           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1611
1612           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1613           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1614
1615           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1616           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM);
1617           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1618           error0 = (! good_l4_checksum0
1619                     ? IP6_ERROR_UDP_CHECKSUM + type0
1620                     : error0);
1621           error1 = (! good_l4_checksum1
1622                     ? IP6_ERROR_UDP_CHECKSUM + type1
1623                     : error1);
1624
1625           /* Drop packets from unroutable hosts. */
1626           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1627           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
1628             {
1629               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1630               error0 = (lm->miss_adj_index == src_adj_index0
1631                         ? IP6_ERROR_SRC_LOOKUP_MISS
1632                         : error0);
1633             }
1634           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP)
1635             {
1636               u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
1637               error1 = (lm->miss_adj_index == src_adj_index1
1638                         ? IP6_ERROR_SRC_LOOKUP_MISS
1639                         : error1);
1640             }
1641
1642           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1643           next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1644
1645           p0->error = error_node->errors[error0];
1646           p1->error = error_node->errors[error1];
1647
1648           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1649                                            to_next, n_left_to_next,
1650                                            pi0, pi1, next0, next1);
1651         }
1652
1653       while (n_left_from > 0 && n_left_to_next > 0)
1654         {
1655           vlib_buffer_t * p0;
1656           ip6_header_t * ip0;
1657           udp_header_t * udp0;
1658           u32 pi0, ip_len0, udp_len0, flags0, next0;
1659           i32 len_diff0;
1660           u8 error0, type0, good_l4_checksum0;
1661       
1662           pi0 = to_next[0] = from[0];
1663           from += 1;
1664           n_left_from -= 1;
1665           to_next += 1;
1666           n_left_to_next -= 1;
1667       
1668           p0 = vlib_get_buffer (vm, pi0);
1669
1670           ip0 = vlib_buffer_get_current (p0);
1671
1672           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1673           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1674
1675           flags0 = p0->flags;
1676
1677           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1678
1679           udp0 = ip6_next_header (ip0);
1680
1681           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1682           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1683
1684           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1685
1686           /* Verify UDP length. */
1687           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1688           udp_len0 = clib_net_to_host_u16 (udp0->length);
1689
1690           len_diff0 = ip_len0 - udp_len0;
1691
1692           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1693
1694           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1695                              && ! good_l4_checksum0
1696                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1697             {
1698               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1699               good_l4_checksum0 =
1700                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1701             }
1702
1703           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1704
1705           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1706
1707           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1708           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM);
1709           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1710           error0 = (! good_l4_checksum0
1711                     ? IP6_ERROR_UDP_CHECKSUM + type0
1712                     : error0);
1713
1714           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1715           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
1716             {
1717               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1718               error0 = (lm->miss_adj_index == src_adj_index0
1719                         ? IP6_ERROR_SRC_LOOKUP_MISS
1720                         : error0);
1721             }
1722
1723           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1724
1725           p0->error = error_node->errors[error0];
1726
1727           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1728                                            to_next, n_left_to_next,
1729                                            pi0, next0);
1730         }
1731   
1732       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1733     }
1734
1735   return frame->n_vectors;
1736 }
1737
1738 VLIB_REGISTER_NODE (ip6_local_node,static) = {
1739   .function = ip6_local,
1740   .name = "ip6-local",
1741   .vector_size = sizeof (u32),
1742
1743   .format_trace = format_ip6_forward_next_trace,
1744
1745   .n_next_nodes = IP_LOCAL_N_NEXT,
1746   .next_nodes = {
1747     [IP_LOCAL_NEXT_DROP] = "error-drop",
1748     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1749     // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip6-tcp-lookup",
1750     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1751     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1752   },
1753 };
1754
1755 void ip6_register_protocol (u32 protocol, u32 node_index)
1756 {
1757   vlib_main_t * vm = vlib_get_main();
1758   ip6_main_t * im = &ip6_main;
1759   ip_lookup_main_t * lm = &im->lookup_main;
1760
1761   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1762   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
1763 }
1764
1765 typedef enum {
1766   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1767   IP6_DISCOVER_NEIGHBOR_N_NEXT,
1768 } ip6_discover_neighbor_next_t;
1769
1770 typedef enum {
1771   IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
1772   IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
1773 } ip6_discover_neighbor_error_t;
1774
1775 static uword
1776 ip6_discover_neighbor (vlib_main_t * vm,
1777                        vlib_node_runtime_t * node,
1778                        vlib_frame_t * frame)
1779 {
1780   vnet_main_t * vnm = vnet_get_main();
1781   ip6_main_t * im = &ip6_main;
1782   ip_lookup_main_t * lm = &im->lookup_main;
1783   u32 * from, * to_next_drop;
1784   uword n_left_from, n_left_to_next_drop;
1785   static f64 time_last_seed_change = -1e100;
1786   static u32 hash_seeds[3];
1787   static uword hash_bitmap[256 / BITS (uword)]; 
1788   f64 time_now;
1789   int bogus_length;
1790
1791   if (node->flags & VLIB_NODE_FLAG_TRACE)
1792     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1793
1794   time_now = vlib_time_now (vm);
1795   if (time_now - time_last_seed_change > 1e-3)
1796     {
1797       uword i;
1798       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1799                                              sizeof (hash_seeds));
1800       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1801         hash_seeds[i] = r[i];
1802
1803       /* Mark all hash keys as been not-seen before. */
1804       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1805         hash_bitmap[i] = 0;
1806
1807       time_last_seed_change = time_now;
1808     }
1809
1810   from = vlib_frame_vector_args (frame);
1811   n_left_from = frame->n_vectors;
1812
1813   while (n_left_from > 0)
1814     {
1815       vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1816                            to_next_drop, n_left_to_next_drop);
1817
1818       while (n_left_from > 0 && n_left_to_next_drop > 0)
1819         {
1820           vlib_buffer_t * p0;
1821           ip6_header_t * ip0;
1822           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1823           uword bm0;
1824           ip_adjacency_t * adj0;
1825           vnet_hw_interface_t * hw_if0;
1826           u32 next0;
1827
1828           pi0 = from[0];
1829
1830           p0 = vlib_get_buffer (vm, pi0);
1831
1832           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1833
1834           ip0 = vlib_buffer_get_current (p0);
1835
1836           adj0 = ip_get_adjacency (lm, adj_index0);
1837
1838           a0 = hash_seeds[0];
1839           b0 = hash_seeds[1];
1840           c0 = hash_seeds[2];
1841
1842           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1843           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1844
1845           a0 ^= sw_if_index0;
1846           b0 ^= ip0->dst_address.as_u32[0];
1847           c0 ^= ip0->dst_address.as_u32[1];
1848
1849           hash_v3_mix32 (a0, b0, c0);
1850
1851           b0 ^= ip0->dst_address.as_u32[2];
1852           c0 ^= ip0->dst_address.as_u32[3];
1853
1854           hash_v3_finalize32 (a0, b0, c0);
1855
1856           c0 &= BITS (hash_bitmap) - 1;
1857           c0 = c0 / BITS (uword);
1858           m0 = (uword) 1 << (c0 % BITS (uword));
1859
1860           bm0 = hash_bitmap[c0];
1861           drop0 = (bm0 & m0) != 0;
1862
1863           /* Mark it as seen. */
1864           hash_bitmap[c0] = bm0 | m0;
1865
1866           from += 1;
1867           n_left_from -= 1;
1868           to_next_drop[0] = pi0;
1869           to_next_drop += 1;
1870           n_left_to_next_drop -= 1;
1871
1872           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1873
1874           /* If the interface is link-down, drop the pkt */
1875           if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
1876             drop0 = 1;
1877
1878           p0->error = 
1879             node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP 
1880                          : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
1881           if (drop0)
1882             continue;
1883
1884           {
1885             u32 bi0 = 0;
1886             icmp6_neighbor_solicitation_header_t * h0;
1887             vlib_buffer_t * b0;
1888
1889             h0 = vlib_packet_template_get_packet 
1890               (vm, &im->discover_neighbor_packet_template, &bi0);
1891
1892             /* 
1893              * Build ethernet header.
1894              * Choose source address based on destination lookup 
1895              * adjacency. 
1896              */
1897             ip6_src_address_for_packet (im, p0, &h0->ip.src_address, 
1898                                         sw_if_index0);
1899
1900             /* 
1901              * Destination address is a solicited node multicast address.  
1902              * We need to fill in
1903              * the low 24 bits with low 24 bits of target's address. 
1904              */
1905             h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
1906             h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
1907             h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
1908
1909             h0->neighbor.target_address = ip0->dst_address;
1910
1911             memcpy (h0->link_layer_option.ethernet_address, 
1912                     hw_if0->hw_address, vec_len (hw_if0->hw_address));
1913
1914             /* $$$$ appears we need this; why is the checksum non-zero? */
1915             h0->neighbor.icmp.checksum = 0;
1916             h0->neighbor.icmp.checksum = 
1917               ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, 
1918                                                  &bogus_length);
1919
1920             ASSERT (bogus_length == 0);
1921
1922             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1923             b0 = vlib_get_buffer (vm, bi0);
1924             vnet_buffer (b0)->sw_if_index[VLIB_TX] 
1925               = vnet_buffer (p0)->sw_if_index[VLIB_TX];
1926
1927             /* Add rewrite/encap string. */
1928             vnet_rewrite_one_header (adj0[0], h0, 
1929                                      sizeof (ethernet_header_t));
1930             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1931
1932             /* $$$$ hack in case next0 == 0 */
1933             b0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_DROP];
1934             next0 = 
1935               vec_elt (im->discover_neighbor_next_index_by_hw_if_index, 
1936                        hw_if0->hw_if_index);
1937
1938             vlib_set_next_frame_buffer (vm, node, next0, bi0);
1939           }
1940         }
1941
1942       vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, 
1943                            n_left_to_next_drop);
1944     }
1945
1946   return frame->n_vectors;
1947 }
1948
1949 static char * ip6_discover_neighbor_error_strings[] = {
1950   [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
1951   [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] 
1952   = "neighbor solicitations sent",
1953 };
1954
1955 VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
1956   .function = ip6_discover_neighbor,
1957   .name = "ip6-discover-neighbor",
1958   .vector_size = sizeof (u32),
1959
1960   .format_trace = format_ip6_forward_next_trace,
1961
1962   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
1963   .error_strings = ip6_discover_neighbor_error_strings,
1964
1965   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
1966   .next_nodes = {
1967     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
1968   },
1969 };
1970
1971 clib_error_t *
1972 ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm,
1973                                                  u32 hw_if_index,
1974                                                  u32 flags)
1975 {
1976   vlib_main_t * vm = vnm->vlib_main;
1977   ip6_main_t * im = &ip6_main;
1978   vnet_hw_interface_t * hw_if;
1979
1980   hw_if = vnet_get_hw_interface (vnm, hw_if_index);
1981
1982   vec_validate_init_empty 
1983     (im->discover_neighbor_next_index_by_hw_if_index, hw_if_index, 0);
1984   im->discover_neighbor_next_index_by_hw_if_index[hw_if_index]
1985     = vlib_node_add_next (vm, ip6_discover_neighbor_node.index,
1986                           hw_if->output_node_index);
1987   return 0;
1988 }
1989
1990 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION 
1991 (ip6_discover_neighbor_hw_interface_link_up_down);
1992
1993 clib_error_t *
1994 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
1995 {
1996   vnet_main_t * vnm = vnet_get_main();
1997   ip6_main_t * im = &ip6_main;
1998   icmp6_neighbor_solicitation_header_t * h;
1999   ip6_address_t * src;
2000   ip_interface_address_t * ia;
2001   ip_adjacency_t * adj;
2002   vnet_hw_interface_t * hi;
2003   vnet_sw_interface_t * si;
2004   vlib_buffer_t * b;
2005   u32 bi = 0;
2006   int bogus_length;
2007
2008   si = vnet_get_sw_interface (vnm, sw_if_index);
2009
2010   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2011     {
2012       return clib_error_return (0, "%U: interface %U down",
2013                                 format_ip6_address, dst, 
2014                                 format_vnet_sw_if_index_name, vnm, 
2015                                 sw_if_index);
2016     }
2017
2018   src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2019   if (! src)
2020     {
2021       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2022       return clib_error_return 
2023         (0, "no matching interface address for destination %U (interface %U)",
2024          format_ip6_address, dst,
2025          format_vnet_sw_if_index_name, vnm, sw_if_index);
2026     }
2027
2028   h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
2029
2030   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2031
2032   /* Destination address is a solicited node multicast address.  We need to fill in
2033      the low 24 bits with low 24 bits of target's address. */
2034   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
2035   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
2036   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
2037
2038   h->ip.src_address = src[0];
2039   h->neighbor.target_address = dst[0];
2040
2041   memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
2042
2043   h->neighbor.icmp.checksum = 
2044     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
2045   ASSERT(bogus_length == 0);
2046
2047   b = vlib_get_buffer (vm, bi);
2048   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2049
2050   /* Add encapsulation string for software interface (e.g. ethernet header). */
2051   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2052   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2053   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2054
2055   {
2056     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2057     u32 * to_next = vlib_frame_vector_args (f);
2058     to_next[0] = bi;
2059     f->n_vectors = 1;
2060     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2061   }
2062
2063   return /* no error */ 0;
2064 }
2065
2066 typedef enum {
2067   IP6_REWRITE_NEXT_DROP,
2068 } ip6_rewrite_next_t;
2069
2070 always_inline uword
2071 ip6_rewrite_inline (vlib_main_t * vm,
2072                     vlib_node_runtime_t * node,
2073                     vlib_frame_t * frame,
2074                     int rewrite_for_locally_received_packets)
2075 {
2076   ip_lookup_main_t * lm = &ip6_main.lookup_main;
2077   u32 * from = vlib_frame_vector_args (frame);
2078   u32 n_left_from, n_left_to_next, * to_next, next_index;
2079   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
2080   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2081
2082   n_left_from = frame->n_vectors;
2083   next_index = node->cached_next_index;
2084   u32 cpu_index = os_get_cpu_number();
2085   
2086   while (n_left_from > 0)
2087     {
2088       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2089
2090       while (n_left_from >= 4 && n_left_to_next >= 2)
2091         {
2092           ip_adjacency_t * adj0, * adj1;
2093           vlib_buffer_t * p0, * p1;
2094           ip6_header_t * ip0, * ip1;
2095           u32 pi0, rw_len0, next0, error0, adj_index0;
2096           u32 pi1, rw_len1, next1, error1, adj_index1;
2097       
2098           /* Prefetch next iteration. */
2099           {
2100             vlib_buffer_t * p2, * p3;
2101
2102             p2 = vlib_get_buffer (vm, from[2]);
2103             p3 = vlib_get_buffer (vm, from[3]);
2104
2105             vlib_prefetch_buffer_header (p2, LOAD);
2106             vlib_prefetch_buffer_header (p3, LOAD);
2107
2108             CLIB_PREFETCH (p2->pre_data, 32, STORE);
2109             CLIB_PREFETCH (p3->pre_data, 32, STORE);
2110
2111             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2112             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2113           }
2114
2115           pi0 = to_next[0] = from[0];
2116           pi1 = to_next[1] = from[1];
2117
2118           from += 2;
2119           n_left_from -= 2;
2120           to_next += 2;
2121           n_left_to_next -= 2;
2122       
2123           p0 = vlib_get_buffer (vm, pi0);
2124           p1 = vlib_get_buffer (vm, pi1);
2125
2126           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2127           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2128
2129           /* We should never rewrite a pkt using the MISS adjacency */
2130           ASSERT(adj_index0 && adj_index1);
2131
2132           ip0 = vlib_buffer_get_current (p0);
2133           ip1 = vlib_buffer_get_current (p1);
2134
2135           error0 = error1 = IP6_ERROR_NONE;
2136
2137           if (! rewrite_for_locally_received_packets)
2138             {
2139               i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
2140
2141               /* Input node should have reject packets with hop limit 0. */
2142               ASSERT (ip0->hop_limit > 0);
2143               ASSERT (ip1->hop_limit > 0);
2144
2145               hop_limit0 -= 1;
2146               hop_limit1 -= 1;
2147
2148               ip0->hop_limit = hop_limit0;
2149               ip1->hop_limit = hop_limit1;
2150
2151               error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
2152               error1 = hop_limit1 <= 0 ? IP6_ERROR_TIME_EXPIRED : error1;
2153             }
2154
2155           adj0 = ip_get_adjacency (lm, adj_index0);
2156           adj1 = ip_get_adjacency (lm, adj_index1);
2157
2158           if (rewrite_for_locally_received_packets)
2159             {
2160               /*
2161                * If someone sends e.g. an icmp6 w/ src = dst = interface addr,
2162                * we end up here with a local adjacency in hand
2163                */
2164               if (PREDICT_FALSE(adj0->lookup_next_index 
2165                                 == IP_LOOKUP_NEXT_LOCAL))
2166                 error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2167               if (PREDICT_FALSE(adj1->lookup_next_index 
2168                                 == IP_LOOKUP_NEXT_LOCAL))
2169                 error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2170             }
2171
2172           rw_len0 = adj0[0].rewrite_header.data_bytes;
2173           rw_len1 = adj1[0].rewrite_header.data_bytes;
2174
2175           vlib_increment_combined_counter (&lm->adjacency_counters,
2176                                            cpu_index, 
2177                                            adj_index0,
2178                                            /* packet increment */ 0,
2179                                            /* byte increment */ rw_len0);
2180           vlib_increment_combined_counter (&lm->adjacency_counters,
2181                                            cpu_index, 
2182                                            adj_index1,
2183                                            /* packet increment */ 0,
2184                                            /* byte increment */ rw_len1);
2185
2186           /* Check MTU of outgoing interface. */
2187           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2188                     ? IP6_ERROR_MTU_EXCEEDED
2189                     : error0);
2190           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2191                     ? IP6_ERROR_MTU_EXCEEDED
2192                     : error1);
2193
2194           p0->current_data -= rw_len0;
2195           p1->current_data -= rw_len1;
2196
2197           p0->current_length += rw_len0;
2198           p1->current_length += rw_len1;
2199
2200           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2201           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2202       
2203           next0 = (error0 == IP6_ERROR_NONE) ? 
2204             adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2205           next1 = (error1 == IP6_ERROR_NONE) ? 
2206             adj1[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2207
2208           /* Guess we are only writing on simple Ethernet header. */
2209           vnet_rewrite_two_headers (adj0[0], adj1[0],
2210                                     ip0, ip1,
2211                                     sizeof (ethernet_header_t));
2212       
2213           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2214                                            to_next, n_left_to_next,
2215                                            pi0, pi1, next0, next1);
2216         }
2217
2218       while (n_left_from > 0 && n_left_to_next > 0)
2219         {
2220           ip_adjacency_t * adj0;
2221           vlib_buffer_t * p0;
2222           ip6_header_t * ip0;
2223           u32 pi0, rw_len0;
2224           u32 adj_index0, next0, error0;
2225       
2226           pi0 = to_next[0] = from[0];
2227
2228           p0 = vlib_get_buffer (vm, pi0);
2229
2230           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2231
2232           /* We should never rewrite a pkt using the MISS adjacency */
2233           ASSERT(adj_index0);
2234
2235           adj0 = ip_get_adjacency (lm, adj_index0);
2236       
2237           ip0 = vlib_buffer_get_current (p0);
2238
2239           error0 = IP6_ERROR_NONE;
2240
2241           /* Check hop limit */
2242           if (! rewrite_for_locally_received_packets)
2243             {
2244               i32 hop_limit0 = ip0->hop_limit;
2245
2246               ASSERT (ip0->hop_limit > 0);
2247
2248               hop_limit0 -= 1;
2249
2250               ip0->hop_limit = hop_limit0;
2251
2252               error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
2253             }
2254
2255           if (rewrite_for_locally_received_packets)
2256             {
2257               if (PREDICT_FALSE(adj0->lookup_next_index 
2258                                 == IP_LOOKUP_NEXT_LOCAL))
2259                 error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2260             }
2261
2262           /* Guess we are only writing on simple Ethernet header. */
2263           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2264       
2265           /* Update packet buffer attributes/set output interface. */
2266           rw_len0 = adj0[0].rewrite_header.data_bytes;
2267
2268           vlib_increment_combined_counter (&lm->adjacency_counters,
2269                                            cpu_index, 
2270                                            adj_index0,
2271                                            /* packet increment */ 0,
2272                                            /* byte increment */ rw_len0);
2273
2274           /* Check MTU of outgoing interface. */
2275           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2276                     ? IP6_ERROR_MTU_EXCEEDED
2277                     : error0);
2278
2279           p0->current_data -= rw_len0;
2280           p0->current_length += rw_len0;
2281           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2282       
2283           next0 = (error0 == IP6_ERROR_NONE) ?
2284             adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2285
2286           p0->error = error_node->errors[error0];
2287
2288           from += 1;
2289           n_left_from -= 1;
2290           to_next += 1;
2291           n_left_to_next -= 1;
2292       
2293           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2294                                            to_next, n_left_to_next,
2295                                            pi0, next0);
2296         }
2297
2298       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2299     }
2300
2301   /* Need to do trace after rewrites to pick up new packet data. */
2302   if (node->flags & VLIB_NODE_FLAG_TRACE)
2303     ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
2304
2305   return frame->n_vectors;
2306 }
2307
2308 static uword
2309 ip6_rewrite_transit (vlib_main_t * vm,
2310                      vlib_node_runtime_t * node,
2311                      vlib_frame_t * frame)
2312 {
2313   return ip6_rewrite_inline (vm, node, frame,
2314                              /* rewrite_for_locally_received_packets */ 0);
2315 }
2316
2317 static uword
2318 ip6_rewrite_local (vlib_main_t * vm,
2319                    vlib_node_runtime_t * node,
2320                    vlib_frame_t * frame)
2321 {
2322   return ip6_rewrite_inline (vm, node, frame,
2323                              /* rewrite_for_locally_received_packets */ 1);
2324 }
2325
2326 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
2327   .function = ip6_rewrite_transit,
2328   .name = "ip6-rewrite",
2329   .vector_size = sizeof (u32),
2330
2331   .format_trace = format_ip6_forward_next_trace,
2332
2333   .n_next_nodes = 1,
2334   .next_nodes = {
2335     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2336   },
2337 };
2338
2339 VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = {
2340   .function = ip6_rewrite_local,
2341   .name = "ip6-rewrite-local",
2342   .vector_size = sizeof (u32),
2343
2344   .sibling_of = "ip6-rewrite",
2345
2346   .format_trace = format_ip6_forward_next_trace,
2347
2348   .n_next_nodes = 1,
2349   .next_nodes = {
2350     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2351   },
2352 };
2353
2354 /* Global IP6 main. */
2355 ip6_main_t ip6_main;
2356
2357 static clib_error_t *
2358 ip6_lookup_init (vlib_main_t * vm)
2359 {
2360   ip6_main_t * im = &ip6_main;
2361   uword i;
2362
2363   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2364     {
2365       u32 j, i0, i1;
2366
2367       i0 = i / 32;
2368       i1 = i % 32;
2369
2370       for (j = 0; j < i0; j++)
2371         im->fib_masks[i].as_u32[j] = ~0;
2372
2373       if (i1)
2374         im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2375     }
2376
2377   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2378
2379   if (im->lookup_table_nbuckets == 0)
2380     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2381
2382   im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
2383
2384   if (im->lookup_table_size == 0)
2385     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2386   
2387   BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table",
2388                         im->lookup_table_nbuckets,
2389                         im->lookup_table_size);
2390   
2391   /* Create FIB with index 0 and table id of 0. */
2392   find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID);
2393
2394   {
2395     pg_node_t * pn;
2396     pn = pg_get_node (ip6_lookup_node.index);
2397     pn->unformat_edit = unformat_pg_ip6_header;
2398   }
2399
2400   {
2401     icmp6_neighbor_solicitation_header_t p;
2402
2403     memset (&p, 0, sizeof (p));
2404
2405     p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
2406     p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
2407                                                 - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
2408     p.ip.protocol = IP_PROTOCOL_ICMP6;
2409     p.ip.hop_limit = 255;
2410     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2411
2412     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2413
2414     p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2415     p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
2416
2417     vlib_packet_template_init (vm,
2418                                &im->discover_neighbor_packet_template,
2419                                &p, sizeof (p),
2420                                /* alloc chunk size */ 8,
2421                                "ip6 neighbor discovery");
2422   }
2423
2424   return 0;
2425 }
2426
2427 VLIB_INIT_FUNCTION (ip6_lookup_init);
2428
2429 static clib_error_t *
2430 add_del_ip6_interface_table (vlib_main_t * vm,
2431                              unformat_input_t * input,
2432                              vlib_cli_command_t * cmd)
2433 {
2434   vnet_main_t * vnm = vnet_get_main();
2435   clib_error_t * error = 0;
2436   u32 sw_if_index, table_id;
2437
2438   sw_if_index = ~0;
2439
2440   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2441     {
2442       error = clib_error_return (0, "unknown interface `%U'",
2443                                  format_unformat_error, input);
2444       goto done;
2445     }
2446
2447   if (unformat (input, "%d", &table_id))
2448     ;
2449   else
2450     {
2451       error = clib_error_return (0, "expected table id `%U'",
2452                                  format_unformat_error, input);
2453       goto done;
2454     }
2455
2456   {
2457     ip6_main_t * im = &ip6_main;
2458     ip6_fib_t * fib = 
2459       find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID);
2460
2461     if (fib) 
2462       {
2463         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2464         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2465     }
2466   }
2467
2468  done:
2469   return error;
2470 }
2471
2472 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2473   .path = "set interface ip6 table",
2474   .function = add_del_ip6_interface_table,
2475   .short_help = "set interface ip6 table <intfc> <table-id>"
2476 };
2477
2478 void 
2479 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
2480                                                   u8 *mac)
2481 {
2482   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2483   /* Invert the "u" bit */
2484   ip->as_u8 [8] = mac[0] ^ (1<<1);
2485   ip->as_u8 [9] = mac[1];
2486   ip->as_u8 [10] = mac[2];
2487   ip->as_u8 [11] = 0xFF;
2488   ip->as_u8 [12] = 0xFE;
2489   ip->as_u8 [13] = mac[3];
2490   ip->as_u8 [14] = mac[4];
2491   ip->as_u8 [15] = mac[5];
2492 }
2493
2494 void 
2495 ip6_ethernet_mac_address_from_link_local_address (u8 *mac, 
2496                                                   ip6_address_t *ip)
2497 {
2498   /* Invert the previously inverted "u" bit */
2499   mac[0] = ip->as_u8 [8] ^ (1<<1);
2500   mac[1] = ip->as_u8 [9];
2501   mac[2] = ip->as_u8 [10];
2502   mac[3] = ip->as_u8 [13];
2503   mac[4] = ip->as_u8 [14];
2504   mac[5] = ip->as_u8 [15];
2505 }
2506
2507 static clib_error_t * 
2508 test_ip6_link_command_fn (vlib_main_t * vm,
2509                           unformat_input_t * input,
2510                           vlib_cli_command_t * cmd)
2511 {
2512   u8 mac[6];
2513   ip6_address_t _a, *a = &_a;
2514
2515   if (unformat (input, "%U", unformat_ethernet_address, mac))
2516     {
2517       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2518       vlib_cli_output (vm, "Link local address: %U",
2519                        format_ip6_address, a);
2520       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2521       vlib_cli_output (vm, "Original MAC address: %U",
2522                        format_ethernet_address, mac);
2523     }
2524                 
2525   return 0;
2526 }
2527
2528 VLIB_CLI_COMMAND (test_link_command, static) = {
2529   .path = "test ip6 link",
2530   .function = test_ip6_link_command_fn, 
2531   .short_help = "test ip6 link <mac-address>",
2532 };
2533
2534 int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2535 {
2536   ip6_main_t * im6 = &ip6_main;
2537   ip6_fib_t * fib;
2538   uword * p = hash_get (im6->fib_index_by_table_id, table_id);
2539
2540   if (p == 0)
2541     return -1;
2542
2543   fib = vec_elt_at_index (im6->fibs, p[0]);
2544
2545   fib->flow_hash_config = flow_hash_config;
2546   return 1;
2547 }
2548
2549 static clib_error_t *
2550 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2551                               unformat_input_t * input,
2552                               vlib_cli_command_t * cmd)
2553 {
2554   int matched = 0;
2555   u32 table_id = 0;
2556   u32 flow_hash_config = 0;
2557   int rv;
2558
2559   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2560     if (unformat (input, "table %d", &table_id))
2561       matched = 1;
2562 #define _(a,v) \
2563     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2564     foreach_flow_hash_bit
2565 #undef _
2566     else break;
2567   }
2568
2569   if (matched == 0)
2570     return clib_error_return (0, "unknown input `%U'",
2571                               format_unformat_error, input);
2572   
2573   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2574   switch (rv)
2575     {
2576     case 1:
2577       break;
2578
2579     case -1:
2580       return clib_error_return (0, "no such FIB table %d", table_id);
2581       
2582     default:
2583       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2584       break;
2585     }
2586   
2587   return 0;
2588 }
2589
2590 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2591     .path = "set ip6 flow-hash",
2592     .short_help = 
2593     "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
2594     .function = set_ip6_flow_hash_command_fn,
2595 };
2596
2597 static clib_error_t *
2598 show_ip6_local_command_fn (vlib_main_t * vm,
2599                            unformat_input_t * input,
2600                            vlib_cli_command_t * cmd)
2601 {
2602   ip6_main_t * im = &ip6_main;
2603   ip_lookup_main_t * lm = &im->lookup_main;
2604   int i;
2605   
2606   vlib_cli_output (vm, "Protocols handled by ip6_local");
2607   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2608     {
2609       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2610         vlib_cli_output (vm, "%d", i);
2611     }
2612   return 0;
2613 }
2614
2615
2616
2617 VLIB_CLI_COMMAND (show_ip_local, static) = {
2618   .path = "show ip6 local",
2619   .function = show_ip6_local_command_fn,
2620   .short_help = "Show ip6 local protocol table",
2621 };
2622
2623 int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
2624                                  u32 table_index)
2625 {
2626   vnet_main_t * vnm = vnet_get_main();
2627   vnet_interface_main_t * im = &vnm->interface_main;
2628   ip6_main_t * ipm = &ip6_main;
2629   ip_lookup_main_t * lm = &ipm->lookup_main;
2630   vnet_classify_main_t * cm = &vnet_classify_main;
2631
2632   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2633     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2634
2635   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2636     return VNET_API_ERROR_NO_SUCH_ENTRY;
2637
2638   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2639   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
2640
2641   return 0;
2642 }
2643
2644 static clib_error_t *
2645 set_ip6_classify_command_fn (vlib_main_t * vm,
2646                              unformat_input_t * input,
2647                              vlib_cli_command_t * cmd)
2648 {
2649   u32 table_index = ~0;
2650   int table_index_set = 0;
2651   u32 sw_if_index = ~0;
2652   int rv;
2653   
2654   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2655     if (unformat (input, "table-index %d", &table_index))
2656       table_index_set = 1;
2657     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
2658                        vnet_get_main(), &sw_if_index))
2659         ;
2660     else
2661         break;
2662   }
2663   
2664   if (table_index_set == 0)
2665       return clib_error_return (0, "classify table-index must be specified");
2666   
2667   if (sw_if_index == ~0)
2668     return clib_error_return (0, "interface / subif must be specified");
2669
2670   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
2671
2672   switch (rv)
2673     {
2674     case 0:
2675       break;
2676
2677     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2678       return clib_error_return (0, "No such interface");
2679
2680     case VNET_API_ERROR_NO_SUCH_ENTRY:
2681       return clib_error_return (0, "No such classifier table");
2682     }
2683   return 0;
2684 }
2685
2686 VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
2687     .path = "set ip6 classify",
2688     .short_help = 
2689     "set ip6 classify intfc <int> table-index <index>",
2690     .function = set_ip6_classify_command_fn,
2691 };
2692
2693 static clib_error_t *
2694 ip6_config (vlib_main_t * vm, unformat_input_t * input)
2695 {
2696   ip6_main_t * im = &ip6_main;
2697   uword heapsize = 0;
2698   u32 tmp;
2699   u32 nbuckets = 0;
2700
2701   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2702     if (unformat (input, "hash-buckets %d", &tmp))
2703       nbuckets = tmp;
2704     else if (unformat (input, "heap-size %dm", &tmp))
2705       heapsize = ((u64)tmp) << 20;
2706     else if (unformat (input, "heap-size %dM", &tmp))
2707       heapsize = ((u64)tmp) << 20;
2708     else if (unformat (input, "heap-size %dg", &tmp))
2709       heapsize = ((u64)tmp) << 30;
2710     else if (unformat (input, "heap-size %dG", &tmp))
2711       heapsize = ((u64)tmp) << 30;
2712     else
2713       return clib_error_return (0, "unknown input '%U'",
2714                                 format_unformat_error, input);
2715   }
2716
2717   im->lookup_table_nbuckets = nbuckets;
2718   im->lookup_table_size = heapsize;
2719
2720   return 0;
2721 }
2722
2723 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
2724