Add indirect next hop support
[vpp.git] / vnet / vnet / ip / ip6_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip6_forward.c: IP v6 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
44 #include <vppinfra/cache.h>
45
46 #include <vppinfra/bihash_template.c>
47
48 static void compute_prefix_lengths_in_search_order (ip6_main_t * im)
49 {
50   int i;
51   vec_reset_length (im->prefix_lengths_in_search_order);
52   /* Note: bitmap reversed so this is in fact a longest prefix match */
53   clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap,
54   ({
55     int dst_address_length = 128 - i;
56     vec_add1 (im->prefix_lengths_in_search_order, dst_address_length);
57   }));
58 }
59
60 u32 
61 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst)
62 {
63   ip_lookup_main_t * lm = &im->lookup_main;
64   int i, len;
65   int rv;
66   BVT(clib_bihash_kv) kv, value;
67   u64 fib;
68
69   len = vec_len (im->prefix_lengths_in_search_order);
70
71   kv.key[0] = dst->as_u64[0];
72   kv.key[1] = dst->as_u64[1];
73   fib = ((u64)((fib_index))<<32);
74
75   for (i = 0; i < len; i++)
76     {
77       int dst_address_length = im->prefix_lengths_in_search_order[i];
78       ip6_address_t * mask = &im->fib_masks[dst_address_length];
79       
80       ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
81       //As lengths are decreasing, masks are increasingly specific.
82       kv.key[0] &= mask->as_u64[0];
83       kv.key[1] &= mask->as_u64[1];
84       kv.key[2] = fib | dst_address_length;
85       
86       rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
87       if (rv == 0)
88         return value.value;
89     }
90
91   return lm->miss_adj_index;
92 }
93
94 u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst)
95 {
96     u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
97     return ip6_fib_lookup_with_table (im, fib_index, dst);
98 }
99
100 void
101 vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index)
102 {
103   ip_lookup_main_t * lm = &im->lookup_main;
104   ip6_add_del_route_args_t a;
105   ip_adjacency_t * adj;
106
107   memset(&a, 0x0, sizeof(ip6_add_del_route_args_t));
108
109   a.table_index_or_table_id = fib_index;
110   a.flags = (IP6_ROUTE_FLAG_ADD
111              | IP6_ROUTE_FLAG_FIB_INDEX
112              | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
113              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
114
115   /* Add ff02::1:ff00:0/104 via local route for all tables.
116      This is required for neighbor discovery to work. */
117   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
118                           &a.adj_index);
119   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
120   adj->if_address_index = ~0;
121   adj->rewrite_header.data_bytes = 0;
122
123   ip6_set_solicited_node_multicast_address (&a.dst_address, 0);
124
125   a.dst_address_length = 104;
126   ip6_add_del_route (im, &a);
127
128   /* Add all-routers multicast address via local route for all tables */
129   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
130                           &a.adj_index);
131   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
132   adj->if_address_index = ~0;
133   adj->rewrite_header.data_bytes = 0;
134
135   ip6_set_reserved_multicast_address (&a.dst_address,
136                                       IP6_MULTICAST_SCOPE_link_local,
137                                       IP6_MULTICAST_GROUP_ID_all_routers);
138   
139   a.dst_address_length = 128;  
140   ip6_add_del_route (im, &a);
141
142   /* Add all-nodes multicast address via local route for all tables */
143   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
144                           &a.adj_index);
145   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
146   adj->if_address_index = ~0;
147   adj->rewrite_header.data_bytes = 0;
148
149   ip6_set_reserved_multicast_address (&a.dst_address,
150                                       IP6_MULTICAST_SCOPE_link_local,
151                                       IP6_MULTICAST_GROUP_ID_all_hosts);
152
153   a.dst_address_length = 128;
154   ip6_add_del_route (im, &a);
155
156   /* Add all-mldv2  multicast address via local route for all tables */
157   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
158                           &a.adj_index);
159   adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
160   adj->if_address_index = ~0;
161   adj->rewrite_header.data_bytes = 0;
162   
163   ip6_set_reserved_multicast_address (&a.dst_address,
164                                       IP6_MULTICAST_SCOPE_link_local,
165                                       IP6_MULTICAST_GROUP_ID_mldv2_routers);
166
167   a.dst_address_length = 128;
168   ip6_add_del_route (im, &a);
169 }
170
171 static ip6_fib_t *
172 create_fib_with_table_id (ip6_main_t * im, u32 table_id)
173 {
174   ip6_fib_t * fib;
175   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
176   vec_add2 (im->fibs, fib, 1);
177   fib->table_id = table_id;
178   fib->index = fib - im->fibs;
179   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
180   vnet_ip6_fib_init (im, fib->index);
181   return fib;
182 }
183
184 ip6_fib_t *
185 find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags)
186 {
187   uword * p, fib_index;
188
189   fib_index = table_index_or_id;
190   if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX))
191     {
192       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
193       if (! p)
194         return create_fib_with_table_id (im, table_index_or_id);
195       fib_index = p[0];
196     }
197   return vec_elt_at_index (im->fibs, fib_index);
198 }
199
200 void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip6_fib_t * fib;
204   ip6_address_t dst_address;
205   u32 dst_address_length, adj_index;
206   uword is_del;
207   u32 old_adj_index = ~0;
208   BVT(clib_bihash_kv) kv, value;
209
210   vlib_smp_unsafe_warning();
211
212   is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0;
213
214   /* Either create new adjacency or use given one depending on arguments. */
215   if (a->n_add_adj > 0)
216     {
217       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
218       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
219     }
220   else
221     adj_index = a->adj_index;
222
223   dst_address = a->dst_address;
224   dst_address_length = a->dst_address_length;
225   fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, 
226                                            a->flags);
227
228   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
229   ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]);
230
231   /* refcount accounting */
232   if (is_del)
233     {
234       ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0);
235       if (--im->dst_address_length_refcounts[dst_address_length] == 0)
236         {
237           im->non_empty_dst_address_length_bitmap =
238             clib_bitmap_set (im->non_empty_dst_address_length_bitmap, 
239                              128 - dst_address_length, 0);
240           compute_prefix_lengths_in_search_order (im);
241         }
242     }
243   else
244     {
245       im->dst_address_length_refcounts[dst_address_length]++;
246
247       im->non_empty_dst_address_length_bitmap =
248         clib_bitmap_set (im->non_empty_dst_address_length_bitmap, 
249                              128 - dst_address_length, 1);
250       compute_prefix_lengths_in_search_order (im);
251     }
252     
253   kv.key[0] = dst_address.as_u64[0];
254   kv.key[1] = dst_address.as_u64[1];
255   kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
256
257   if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
258     old_adj_index = value.value;
259
260   if (is_del)
261     BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */);
262   else
263     {
264       /* Make sure adj index is valid. */
265       if (CLIB_DEBUG > 0)
266         (void) ip_get_adjacency (lm, adj_index);
267
268       kv.value = adj_index;
269
270       BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */);
271     }
272
273   /* Avoid spurious reference count increments */
274   if (old_adj_index == adj_index && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
275     {
276       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
277       if (adj->share_count > 0)
278         adj->share_count --;
279     }
280
281   /* Delete old adjacency index if present and changed. */
282   {
283     if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
284         && old_adj_index != ~0
285         && old_adj_index != adj_index)
286       ip_del_adjacency (lm, old_adj_index);
287   }
288 }
289
290 void
291 ip6_add_del_route_next_hop (ip6_main_t * im,
292                             u32 flags,
293                             ip6_address_t * dst_address,
294                             u32 dst_address_length,
295                             ip6_address_t * next_hop,
296                             u32 next_hop_sw_if_index,
297                             u32 next_hop_weight, u32 adj_index,
298                             u32 explicit_fib_index)
299 {
300   vnet_main_t * vnm = vnet_get_main();
301   ip_lookup_main_t * lm = &im->lookup_main;
302   u32 fib_index;
303   ip6_fib_t * fib;
304   ip6_address_t masked_dst_address;
305   u32 old_mp_adj_index, new_mp_adj_index;
306   u32 dst_adj_index, nh_adj_index;
307   int rv;
308   ip_adjacency_t * dst_adj;
309   ip_multipath_adjacency_t * old_mp, * new_mp;
310   int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0;
311   int is_interface_next_hop;
312   clib_error_t * error = 0;
313   uword * nh_result;
314   BVT(clib_bihash_kv) kv, value;
315
316   vlib_smp_unsafe_warning();
317
318   if (explicit_fib_index == (u32)~0)
319     fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
320   else
321     fib_index = explicit_fib_index;
322
323   fib = vec_elt_at_index (im->fibs, fib_index);
324
325   /* Lookup next hop to be added or deleted. */
326   is_interface_next_hop = ip6_address_is_zero (next_hop);
327   if (adj_index == (u32)~0)
328     {
329       if (is_interface_next_hop)
330         {
331           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, 
332                                 next_hop_sw_if_index);
333           if (nh_result)
334             nh_adj_index = *nh_result;
335           else
336             {
337               ip_adjacency_t * adj;
338               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
339                                       &nh_adj_index);
340               ip6_adjacency_set_interface_route (vnm, adj, 
341                                                  next_hop_sw_if_index, ~0);
342               ip_call_add_del_adjacency_callbacks 
343                 (lm, next_hop_sw_if_index, /* is_del */ 0);
344               hash_set (im->interface_route_adj_index_by_sw_if_index, 
345                         next_hop_sw_if_index, nh_adj_index);
346             }
347         }
348       else
349         {
350           /* Look for the interface /128 route */
351           kv.key[0] = next_hop->as_u64[0];
352           kv.key[1] = next_hop->as_u64[1];
353           kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
354
355           if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
356           {
357             ip_adjacency_t * adj;
358             nh_adj_index = ip6_fib_lookup_with_table (im, fib_index, next_hop);
359             adj = ip_get_adjacency (lm, nh_adj_index);
360             /* if ND interface adjacencty is present, we need to
361                              install ND adjaceny for specific next hop */
362             if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
363                 adj->arp.next_hop.ip6.as_u64[0] == 0 &&
364                 adj->arp.next_hop.ip6.as_u64[1] == 0)
365             {
366               nh_adj_index = vnet_ip6_neighbor_glean_add(fib_index, next_hop);
367             }
368             else
369             {
370               ip_adjacency_t add_adj;
371               add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
372               add_adj.indirect.next_hop.ip6.as_u64[0] = next_hop->as_u64[0];
373               add_adj.indirect.next_hop.ip6.as_u64[1] = next_hop->as_u64[1];
374               add_adj.explicit_fib_index = explicit_fib_index;
375               ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
376             }
377           }
378           else
379             nh_adj_index = value.value;
380
381         }
382     }
383   else
384     {
385       /* Look for the interface /128 route */
386       kv.key[0] = next_hop->as_u64[0];
387       kv.key[1] = next_hop->as_u64[1];
388       kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
389       
390       if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
391         {
392           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
393           error = clib_error_return (0, "next-hop %U/128 not in FIB",
394                                      format_ip6_address, next_hop);
395           goto done;
396         }
397       
398       nh_adj_index = value.value;
399     }
400
401   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
402   masked_dst_address = dst_address[0];
403   ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]);
404
405   kv.key[0] = masked_dst_address.as_u64[0];
406   kv.key[1] = masked_dst_address.as_u64[1];
407   kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
408
409   rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value);
410
411   if (rv == 0)
412     {
413       dst_adj_index = value.value;
414       dst_adj = ip_get_adjacency (lm, dst_adj_index);
415     }
416   else
417     {
418       /* For deletes destination must be known. */
419       if (is_del)
420         {
421           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
422           error = clib_error_return (0, "unknown destination %U/%d",
423                                      format_ip6_address, dst_address,
424                                      dst_address_length);
425           goto done;
426         }
427
428       dst_adj_index = ~0;
429       dst_adj = 0;
430     }
431
432   /* Ignore adds of X/128 with next hop of X. */
433   if (! is_del
434       && dst_address_length == 128
435       && ip6_address_is_equal (dst_address, next_hop))
436     {
437       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
438       error = clib_error_return (0, "prefix matches next hop %U/%d",
439                                  format_ip6_address, dst_address,
440                                  dst_address_length);
441       goto done;
442     }
443
444   /* Destination is not known and default weight is set so add route
445      to existing non-multipath adjacency */
446   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
447   {
448     /* create new adjacency */
449     ip6_add_del_route_args_t a;
450     a.table_index_or_table_id = fib_index;
451     a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
452         | IP6_ROUTE_FLAG_FIB_INDEX
453         | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
454         | (flags & (IP6_ROUTE_FLAG_NO_REDISTRIBUTE
455             | IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
456     a.dst_address = dst_address[0];
457     a.dst_address_length = dst_address_length;
458     a.adj_index = nh_adj_index;
459     a.add_adj = 0;
460     a.n_add_adj = 0;
461
462     ip6_add_del_route (im, &a);
463     goto done;
464   }
465
466   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
467
468   if (! ip_multipath_adjacency_add_del_next_hop
469       (lm, is_del,
470        dst_adj ? dst_adj->heap_handle : ~0,
471        nh_adj_index,
472        next_hop_weight,
473        &new_mp_adj_index))
474     {
475       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
476       error = clib_error_return 
477         (0, "requested deleting next-hop %U not found in multi-path",
478          format_ip6_address, next_hop);
479       goto done;
480     }
481   
482   old_mp = new_mp = 0;
483   if (old_mp_adj_index != ~0)
484     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
485   if (new_mp_adj_index != ~0)
486     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
487
488   if (old_mp != new_mp)
489     {
490       ip6_add_del_route_args_t a;
491       a.table_index_or_table_id = fib_index;
492       a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
493                  | IP6_ROUTE_FLAG_FIB_INDEX
494                  | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
495                  | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE));
496       a.dst_address = dst_address[0];
497       a.dst_address_length = dst_address_length;
498       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
499       a.add_adj = 0;
500       a.n_add_adj = 0;
501
502       ip6_add_del_route (im, &a);
503     }
504
505  done:
506   if (error)
507     clib_error_report (error);
508 }
509
510 u32
511 ip6_get_route (ip6_main_t * im,
512                u32 table_index_or_table_id,
513                u32 flags,
514                ip6_address_t * address,
515                u32 address_length)
516 {
517   ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
518   ip6_address_t masked_address;
519   BVT(clib_bihash_kv) kv, value;
520
521   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
522   clib_memcpy (&masked_address, address, sizeof (masked_address));
523   ip6_address_mask (&masked_address, &im->fib_masks[address_length]);
524
525   kv.key[0] = masked_address.as_u64[0];
526   kv.key[1] = masked_address.as_u64[1];
527   kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length;
528
529   if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
530     return (value.value);
531   return 0;
532 }
533
534 void
535 ip6_foreach_matching_route (ip6_main_t * im,
536                             u32 table_index_or_table_id,
537                             u32 flags,
538                             ip6_address_t * dst_address,
539                             u32 address_length,
540                             ip6_address_t ** results,
541                             u8 ** result_lengths)
542 {
543   ip6_fib_t * fib = 
544     find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
545   BVT(clib_bihash) * h = &im->ip6_lookup_table;
546   BVT(clib_bihash_value) * v;
547   clib_bihash_bucket_t * b;
548   int i, j, k;
549   
550   if (*results)
551     _vec_len (*results) = 0;
552   if (*result_lengths)
553     _vec_len (*result_lengths) = 0;
554
555   /* Walk the table looking for routes which match the supplied address */
556   for (i = 0; i < h->nbuckets; i++)
557     {
558       b = &h->buckets [i];
559       if (b->offset == 0)
560           continue;
561
562       v = BV(clib_bihash_get_value) (h, b->offset);
563       for (j = 0; j < (1<<b->log2_pages); j++)
564         {
565           for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
566             {
567               if (BV(clib_bihash_is_free)(&v->kvp[k]))
568                 continue;
569               
570               if ((v->kvp[k].key[2] 
571                    == (((u64)((fib - im->fibs))<<32) | address_length))
572                   && ip6_destination_matches_route 
573                   (im, dst_address, (ip6_address_t *) &v->kvp[k], 
574                    address_length))
575                 {
576                   ip6_address_t * a;
577
578                   a = (ip6_address_t *)(&v->kvp[k]);
579
580                   vec_add1 (*results, a[0]);
581                   vec_add1 (*result_lengths, address_length);
582                 }
583             }
584           v++;
585         }
586     }
587 }
588
589 void ip6_maybe_remap_adjacencies (ip6_main_t * im,
590                                   u32 table_index_or_table_id,
591                                   u32 flags)
592 {
593 #if SOONE
594   ip6_fib_t * fib 
595     = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
596 #endif
597   ip_lookup_main_t * lm = &im->lookup_main;
598
599   if (lm->n_adjacency_remaps == 0)
600     return;
601
602   clib_warning ("unimplemented, please report to vpp-dev@cisco.com");
603
604   /* All remaps have been performed. */
605   lm->n_adjacency_remaps = 0;
606 }
607
608 void ip6_delete_matching_routes (ip6_main_t * im,
609                                  u32 table_index_or_table_id,
610                                  u32 flags,
611                                  ip6_address_t * address,
612                                  u32 address_length)
613 {
614   /* $$$$ static may be OK - this should happen only on thread 0 */
615   static ip6_address_t * matching_addresses;
616   static u8 * matching_address_lengths;
617   u32 l, i;
618   ip6_add_del_route_args_t a;
619
620   vlib_smp_unsafe_warning();
621
622   a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
623   a.table_index_or_table_id = table_index_or_table_id;
624   a.adj_index = ~0;
625   a.add_adj = 0;
626   a.n_add_adj = 0;
627
628   for (l = address_length + 1; l <= 128; l++)
629     {
630       ip6_foreach_matching_route (im, table_index_or_table_id, flags,
631                                   address,
632                                   l,
633                                   &matching_addresses,
634                                   &matching_address_lengths);
635       for (i = 0; i < vec_len (matching_addresses); i++)
636         {
637           a.dst_address = matching_addresses[i];
638           a.dst_address_length = matching_address_lengths[i];
639           ip6_add_del_route (im, &a);
640         }
641     }
642
643   ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
644 }
645
646 always_inline uword
647 ip6_lookup_inline (vlib_main_t * vm,
648                    vlib_node_runtime_t * node,
649                    vlib_frame_t * frame,
650                    int is_indirect)
651 {
652   ip6_main_t * im = &ip6_main;
653   ip_lookup_main_t * lm = &im->lookup_main;
654   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
655   u32 n_left_from, n_left_to_next, * from, * to_next;
656   ip_lookup_next_t next;
657   u32 cpu_index = os_get_cpu_number();
658
659   from = vlib_frame_vector_args (frame);
660   n_left_from = frame->n_vectors;
661   next = node->cached_next_index;
662
663   while (n_left_from > 0)
664     {
665       vlib_get_next_frame (vm, node, next,
666                            to_next, n_left_to_next);
667
668       while (n_left_from >= 4 && n_left_to_next >= 2)
669         {
670           vlib_buffer_t * p0, * p1;
671           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
672           ip_lookup_next_t next0, next1;
673           ip6_header_t * ip0, * ip1;
674           ip_adjacency_t * adj0, * adj1;
675           ip6_address_t * dst_addr0, * dst_addr1;
676           u32 fib_index0, fib_index1;
677           u32 flow_hash_config0, flow_hash_config1;
678
679           /* Prefetch next iteration. */
680           {
681             vlib_buffer_t * p2, * p3;
682
683             p2 = vlib_get_buffer (vm, from[2]);
684             p3 = vlib_get_buffer (vm, from[3]);
685
686             vlib_prefetch_buffer_header (p2, LOAD);
687             vlib_prefetch_buffer_header (p3, LOAD);
688             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
689             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
690           }
691
692           pi0 = to_next[0] = from[0];
693           pi1 = to_next[1] = from[1];
694
695           p0 = vlib_get_buffer (vm, pi0);
696           p1 = vlib_get_buffer (vm, pi1);
697
698           ip0 = vlib_buffer_get_current (p0);
699           ip1 = vlib_buffer_get_current (p1);
700
701           if (is_indirect)
702             {
703               ip_adjacency_t * iadj0, * iadj1;
704               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
705               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
706               dst_addr0 = &iadj0->indirect.next_hop.ip6;
707               dst_addr1 = &iadj1->indirect.next_hop.ip6;
708             }
709           else
710             {
711               dst_addr0 = &ip0->dst_address;
712               dst_addr1 = &ip1->dst_address;
713             }
714
715           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
716           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
717
718           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
719             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
720           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
721             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
722
723           adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0);
724           adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, dst_addr1);
725
726           adj0 = ip_get_adjacency (lm, adj_index0);
727           adj1 = ip_get_adjacency (lm, adj_index1);
728
729           if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
730             {
731               adj_index0 = ip6_fib_lookup_with_table 
732                 (im, adj0->explicit_fib_index, dst_addr0);
733               adj0 = ip_get_adjacency (lm, adj_index0);
734             }
735           if (PREDICT_FALSE (adj1->explicit_fib_index != ~0))
736             {
737               adj_index1 = ip6_fib_lookup_with_table 
738                 (im, adj1->explicit_fib_index, dst_addr1);
739               adj1 = ip_get_adjacency (lm, adj_index1);
740             }
741
742           next0 = adj0->lookup_next_index;
743           next1 = adj1->lookup_next_index;
744
745           /* Process hop-by-hop options if present */
746           next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
747               IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
748           next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
749               IP_LOOKUP_NEXT_HOP_BY_HOP : next1;
750
751           vnet_buffer (p0)->ip.flow_hash = 
752             vnet_buffer(p1)->ip.flow_hash = 0;
753
754           if (PREDICT_FALSE(adj0->n_adj > 1))
755             {
756               flow_hash_config0 = 
757                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
758               vnet_buffer (p0)->ip.flow_hash = 
759                 ip6_compute_flow_hash (ip0, flow_hash_config0);
760             }
761
762           if (PREDICT_FALSE(adj1->n_adj > 1))
763             {
764               flow_hash_config1 = 
765                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
766
767               vnet_buffer (p1)->ip.flow_hash = 
768                 ip6_compute_flow_hash (ip1, flow_hash_config1);
769             }
770
771           ASSERT (adj0->n_adj > 0);
772           ASSERT (adj1->n_adj > 0);
773           ASSERT (is_pow2 (adj0->n_adj));
774           ASSERT (is_pow2 (adj1->n_adj));
775           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
776           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
777
778           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
779           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
780
781           vlib_increment_combined_counter 
782               (cm, cpu_index, adj_index0, 1,
783                vlib_buffer_length_in_chain (vm, p0));
784           vlib_increment_combined_counter 
785               (cm, cpu_index, adj_index1, 1,
786                vlib_buffer_length_in_chain (vm, p1));
787
788           from += 2;
789           to_next += 2;
790           n_left_to_next -= 2;
791           n_left_from -= 2;
792
793           wrong_next = (next0 != next) + 2*(next1 != next);
794           if (PREDICT_FALSE (wrong_next != 0))
795             {
796               switch (wrong_next)
797                 {
798                 case 1:
799                   /* A B A */
800                   to_next[-2] = pi1;
801                   to_next -= 1;
802                   n_left_to_next += 1;
803                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
804                   break;
805
806                 case 2:
807                   /* A A B */
808                   to_next -= 1;
809                   n_left_to_next += 1;
810                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
811                   break;
812
813                 case 3:
814                   /* A B C */
815                   to_next -= 2;
816                   n_left_to_next += 2;
817                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
818                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
819                   if (next0 == next1)
820                     {
821                       /* A B B */
822                       vlib_put_next_frame (vm, node, next, n_left_to_next);
823                       next = next1;
824                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
825                     }
826                 }
827             }
828         }
829     
830       while (n_left_from > 0 && n_left_to_next > 0)
831         {
832           vlib_buffer_t * p0;
833           ip6_header_t * ip0;
834           u32 pi0, adj_index0;
835           ip_lookup_next_t next0;
836           ip_adjacency_t * adj0;
837           ip6_address_t * dst_addr0;
838           u32 fib_index0, flow_hash_config0;
839
840           pi0 = from[0];
841           to_next[0] = pi0;
842
843           p0 = vlib_get_buffer (vm, pi0);
844
845           ip0 = vlib_buffer_get_current (p0);
846
847           if (is_indirect)
848             {
849               ip_adjacency_t * iadj0;
850               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
851               dst_addr0 = &iadj0->indirect.next_hop.ip6;
852             }
853           else
854             {
855               dst_addr0 = &ip0->dst_address;
856             }
857
858           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
859           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
860             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
861
862           flow_hash_config0 = 
863               vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
864
865           adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0);
866
867           adj0 = ip_get_adjacency (lm, adj_index0);
868
869           if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
870             {
871               adj_index0 = ip6_fib_lookup_with_table
872                 (im, adj0->explicit_fib_index, dst_addr0);
873               adj0 = ip_get_adjacency (lm, adj_index0);
874             }
875
876           next0 = adj0->lookup_next_index;
877           next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
878               IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
879
880           vnet_buffer (p0)->ip.flow_hash = 0;
881
882           if (PREDICT_FALSE(adj0->n_adj > 1))
883             {
884               flow_hash_config0 = 
885                 vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
886               vnet_buffer (p0)->ip.flow_hash = 
887                 ip6_compute_flow_hash (ip0, flow_hash_config0);
888             }
889
890           ASSERT (adj0->n_adj > 0);
891           ASSERT (is_pow2 (adj0->n_adj));
892           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
893
894           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
895
896           vlib_increment_combined_counter 
897               (cm, cpu_index, adj_index0, 1,
898                vlib_buffer_length_in_chain (vm, p0));
899
900           from += 1;
901           to_next += 1;
902           n_left_to_next -= 1;
903           n_left_from -= 1;
904
905           if (PREDICT_FALSE (next0 != next))
906             {
907               n_left_to_next += 1;
908               vlib_put_next_frame (vm, node, next, n_left_to_next);
909               next = next0;
910               vlib_get_next_frame (vm, node, next,
911                                    to_next, n_left_to_next);
912               to_next[0] = pi0;
913               to_next += 1;
914               n_left_to_next -= 1;
915             }
916         }
917
918       vlib_put_next_frame (vm, node, next, n_left_to_next);
919     }
920
921   return frame->n_vectors;
922 }
923
924 void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
925                                         ip_adjacency_t * adj,
926                                         u32 sw_if_index,
927                                         u32 if_address_index)
928 {
929   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
930   ip_lookup_next_t n;
931   u32 node_index;
932
933   if (hw->hw_class_index == ethernet_hw_interface_class.index
934       || hw->hw_class_index == srp_hw_interface_class.index)
935     {
936       n = IP_LOOKUP_NEXT_ARP;
937       node_index = ip6_discover_neighbor_node.index;
938       adj->if_address_index = if_address_index;
939       adj->arp.next_hop.ip6.as_u64[0] = 0;
940       adj->arp.next_hop.ip6.as_u64[1] = 0;
941   }
942   else
943     {
944       n = IP_LOOKUP_NEXT_REWRITE;
945       node_index = ip6_rewrite_node.index;
946     }
947
948  adj->lookup_next_index = n;
949  adj->explicit_fib_index = ~0;
950
951  vnet_rewrite_for_sw_interface
952    (vnm,
953     VNET_L3_PACKET_TYPE_IP6,
954     sw_if_index,
955     node_index,
956     VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
957     &adj->rewrite_header,
958     sizeof (adj->rewrite_data));
959 }
960
961 static void
962 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
963                           ip6_main_t * im, u32 fib_index,
964                           ip_interface_address_t * a)
965 {
966   ip_lookup_main_t * lm = &im->lookup_main;
967   ip_adjacency_t * adj;
968   ip6_address_t * address = ip_interface_address_get_address (lm, a);
969   ip6_add_del_route_args_t x;
970   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
971   u32 classify_table_index;
972
973   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
974   x.table_index_or_table_id = fib_index;
975   x.flags = (IP6_ROUTE_FLAG_ADD
976              | IP6_ROUTE_FLAG_FIB_INDEX
977              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
978   x.dst_address = address[0];
979   x.dst_address_length = a->address_length;
980   x.n_add_adj = 0;
981   x.add_adj = 0;
982
983   a->neighbor_probe_adj_index = ~0;
984   if (a->address_length < 128)
985     {
986       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
987                               &x.adj_index);
988       ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
989       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
990       ip6_add_del_route (im, &x);
991       a->neighbor_probe_adj_index = x.adj_index;
992     }
993
994   /* Add e.g. ::1/128 as local to this host. */
995   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
996                           &x.adj_index);
997
998   classify_table_index = ~0;
999   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1000     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1001   if (classify_table_index != (u32) ~0)
1002     {
1003       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1004       adj->classify.table_index = classify_table_index;
1005     }
1006   else
1007     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1008   
1009   adj->if_address_index = a - lm->if_address_pool;
1010   adj->rewrite_header.sw_if_index = sw_if_index;
1011   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1012   adj->rewrite_header.data_bytes = 0;
1013   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1014   x.dst_address_length = 128;
1015   ip6_add_del_route (im, &x);
1016 }
1017
1018 static void
1019 ip6_del_interface_routes (ip6_main_t * im, u32 fib_index,
1020                           ip6_address_t * address, u32 address_length)
1021 {
1022   ip6_add_del_route_args_t x;
1023
1024   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1025   x.table_index_or_table_id = fib_index;
1026   x.flags = (IP6_ROUTE_FLAG_DEL
1027              | IP6_ROUTE_FLAG_FIB_INDEX
1028              | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
1029   x.dst_address = address[0];
1030   x.dst_address_length = address_length;
1031   x.adj_index = ~0;
1032   x.n_add_adj = 0;
1033   x.add_adj = 0;
1034
1035   if (address_length < 128)
1036     {
1037       /* Don't wipe out fe80::0/64 */
1038       if (address_length != 64 || 
1039           address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL))
1040         ip6_add_del_route (im, &x);
1041     }
1042
1043   x.dst_address_length = 128;
1044   ip6_add_del_route (im, &x);
1045
1046   ip6_delete_matching_routes (im,
1047                               fib_index,
1048                               IP6_ROUTE_FLAG_FIB_INDEX,
1049                               address,
1050                               address_length);
1051 }
1052
1053 typedef struct {
1054     u32 sw_if_index;
1055     ip6_address_t address;
1056     u32 length;
1057 } ip6_interface_address_t;
1058
1059 static clib_error_t *
1060 ip6_add_del_interface_address_internal (vlib_main_t * vm,
1061                                         u32 sw_if_index,
1062                                         ip6_address_t * new_address,
1063                                         u32 new_length,
1064                                         u32 redistribute,
1065                                         u32 insert_routes,
1066                                         u32 is_del);
1067
1068 static clib_error_t *
1069 ip6_add_del_interface_address_internal (vlib_main_t * vm,
1070                                         u32 sw_if_index,
1071                                         ip6_address_t * address,
1072                                         u32 address_length,
1073                                         u32 redistribute,
1074                                         u32 insert_routes,
1075                                         u32 is_del)
1076 {
1077   vnet_main_t * vnm = vnet_get_main();
1078   ip6_main_t * im = &ip6_main;
1079   ip_lookup_main_t * lm = &im->lookup_main;
1080   clib_error_t * error;
1081   u32 if_address_index;
1082   ip6_address_fib_t ip6_af, * addr_fib = 0;
1083
1084   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1085   ip6_addr_fib_init (&ip6_af, address,
1086                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1087   vec_add1 (addr_fib, ip6_af);
1088
1089   {
1090     uword elts_before = pool_elts (lm->if_address_pool);
1091
1092     error = ip_interface_address_add_del
1093       (lm,
1094        sw_if_index,
1095        addr_fib,
1096        address_length,
1097        is_del,
1098        &if_address_index);
1099     if (error)
1100       goto done;
1101
1102     /* Pool did not grow: add duplicate address. */
1103     if (elts_before == pool_elts (lm->if_address_pool))
1104       goto done;
1105   }
1106
1107   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1108     {
1109       if (is_del)
1110         ip6_del_interface_routes (im, ip6_af.fib_index, address,
1111                                   address_length);
1112
1113       else
1114         ip6_add_interface_routes (vnm, sw_if_index,
1115                                   im, ip6_af.fib_index,
1116                                   pool_elt_at_index (lm->if_address_pool, if_address_index));
1117     }
1118
1119   {
1120     ip6_add_del_interface_address_callback_t * cb;
1121     vec_foreach (cb, im->add_del_interface_address_callbacks)
1122       cb->function (im, cb->function_opaque, sw_if_index,
1123                     address, address_length,
1124                     if_address_index,
1125                     is_del);
1126   }
1127
1128  done:
1129   vec_free (addr_fib);
1130   return error;
1131 }
1132
1133 clib_error_t *
1134 ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1135                                ip6_address_t * address, u32 address_length,
1136                                u32 is_del)
1137 {
1138   return ip6_add_del_interface_address_internal
1139     (vm, sw_if_index, address, address_length,
1140      /* redistribute */ 1,
1141      /* insert_routes */ 1,
1142      is_del);
1143 }
1144
1145 clib_error_t *
1146 ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
1147                                 u32 sw_if_index,
1148                                 u32 flags)
1149 {
1150   ip6_main_t * im = &ip6_main;
1151   ip_interface_address_t * ia;
1152   ip6_address_t * a;
1153   u32 is_admin_up, fib_index;
1154
1155   /* Fill in lookup tables with default table (0). */
1156   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1157
1158   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1159
1160   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1161
1162   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1163
1164   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1165                                 0 /* honor unnumbered */,
1166   ({
1167     a = ip_interface_address_get_address (&im->lookup_main, ia);
1168     if (is_admin_up)
1169       ip6_add_interface_routes (vnm, sw_if_index,
1170                                 im, fib_index,
1171                                 ia);
1172     else
1173       ip6_del_interface_routes (im, fib_index,
1174                                 a, ia->address_length);
1175   }));
1176
1177   return 0;
1178 }
1179
1180 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
1181
1182 clib_error_t *
1183 ip6_sw_interface_add_del (vnet_main_t * vnm,
1184                           u32 sw_if_index,
1185                           u32 is_add)
1186 {
1187   vlib_main_t * vm = vnm->vlib_main;
1188   ip6_main_t * im = &ip6_main;
1189   ip_lookup_main_t * lm = &im->lookup_main;
1190   u32 ci, cast;
1191
1192   for (cast = 0; cast < VNET_N_CAST; cast++)
1193     {
1194       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1195       vnet_config_main_t * vcm = &cm->config_main;
1196
1197       /* FIXME multicast. */
1198       if (! vcm->node_index_by_feature_index)
1199         {
1200           char * start_nodes[] = { "ip6-input", };
1201           char * feature_nodes[] = {
1202             [IP6_RX_FEATURE_CHECK_ACCESS] = "ip6-inacl",
1203             [IP6_RX_FEATURE_IPSEC] = "ipsec-input-ip6",
1204             [IP6_RX_FEATURE_L2TPV3] = "l2tp-decap",
1205             [IP6_RX_FEATURE_VPATH]  = "vpath-input-ip6",
1206             [IP6_RX_FEATURE_LOOKUP] = "ip6-lookup",
1207           };
1208           vnet_config_init (vm, vcm,
1209                             start_nodes, ARRAY_LEN (start_nodes),
1210                             feature_nodes, ARRAY_LEN (feature_nodes));
1211         }
1212
1213       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1214       ci = cm->config_index_by_sw_if_index[sw_if_index];
1215
1216       if (is_add)
1217         ci = vnet_config_add_feature (vm, vcm,
1218                                       ci,
1219                                       IP6_RX_FEATURE_LOOKUP,
1220                                       /* config data */ 0,
1221                                       /* # bytes of config data */ 0);
1222       else
1223         ci = vnet_config_del_feature (vm, vcm,
1224                                       ci,
1225                                       IP6_RX_FEATURE_LOOKUP,
1226                                       /* config data */ 0,
1227                                       /* # bytes of config data */ 0);
1228
1229       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1230     }
1231   return /* no error */ 0;
1232 }
1233
1234 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
1235
1236 static uword
1237 ip6_lookup (vlib_main_t * vm,
1238             vlib_node_runtime_t * node,
1239             vlib_frame_t * frame)
1240 {
1241   return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 0);
1242 }
1243
1244 VLIB_REGISTER_NODE (ip6_lookup_node) = {
1245   .function = ip6_lookup,
1246   .name = "ip6-lookup",
1247   .vector_size = sizeof (u32),
1248
1249   .n_next_nodes = IP_LOOKUP_N_NEXT,
1250   .next_nodes = IP6_LOOKUP_NEXT_NODES,
1251 };
1252
1253 static uword
1254 ip6_indirect (vlib_main_t * vm,
1255               vlib_node_runtime_t * node,
1256               vlib_frame_t * frame)
1257 {
1258   return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 1);
1259 }
1260
1261
1262 VLIB_REGISTER_NODE (ip6_indirect_node) = {
1263   .function = ip6_indirect,
1264   .name = "ip6-indirect",
1265   .vector_size = sizeof (u32),
1266
1267   .n_next_nodes = IP_LOOKUP_N_NEXT,
1268   .next_nodes = IP6_LOOKUP_NEXT_NODES,
1269 };
1270
1271 typedef struct {
1272   /* Adjacency taken. */
1273   u32 adj_index;
1274   u32 flow_hash;
1275   u32 fib_index;
1276
1277   /* Packet data, possibly *after* rewrite. */
1278   u8 packet_data[128 - 1*sizeof(u32)];
1279 } ip6_forward_next_trace_t;
1280
1281 static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
1282 {
1283   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1284   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1285   ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
1286   vnet_main_t * vnm = vnet_get_main();
1287   ip6_main_t * im = &ip6_main;
1288   ip_adjacency_t * adj;
1289   uword indent = format_get_indent (s);
1290
1291   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1292   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1293               t->fib_index, t->adj_index, format_ip_adjacency,
1294               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1295   switch (adj->lookup_next_index)
1296     {
1297     case IP_LOOKUP_NEXT_REWRITE:
1298       s = format (s, "\n%U%U",
1299                   format_white_space, indent,
1300                   format_ip_adjacency_packet_data,
1301                   vnm, &im->lookup_main, t->adj_index,
1302                   t->packet_data, sizeof (t->packet_data));
1303       break;
1304
1305     default:
1306       break;
1307     }
1308
1309   return s;
1310 }
1311
1312 /* Common trace function for all ip6-forward next nodes. */
1313 void
1314 ip6_forward_next_trace (vlib_main_t * vm,
1315                         vlib_node_runtime_t * node,
1316                         vlib_frame_t * frame,
1317                         vlib_rx_or_tx_t which_adj_index)
1318 {
1319   u32 * from, n_left;
1320   ip6_main_t * im = &ip6_main;
1321
1322   n_left = frame->n_vectors;
1323   from = vlib_frame_vector_args (frame);
1324   
1325   while (n_left >= 4)
1326     {
1327       u32 bi0, bi1;
1328       vlib_buffer_t * b0, * b1;
1329       ip6_forward_next_trace_t * t0, * t1;
1330
1331       /* Prefetch next iteration. */
1332       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1333       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1334
1335       bi0 = from[0];
1336       bi1 = from[1];
1337
1338       b0 = vlib_get_buffer (vm, bi0);
1339       b1 = vlib_get_buffer (vm, bi1);
1340
1341       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1342         {
1343           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1344           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1345           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1346           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1347                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1348           clib_memcpy (t0->packet_data,
1349                   vlib_buffer_get_current (b0),
1350                   sizeof (t0->packet_data));
1351         }
1352       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1353         {
1354           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1355           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1356           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1357           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1358                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1359           clib_memcpy (t1->packet_data,
1360                   vlib_buffer_get_current (b1),
1361                   sizeof (t1->packet_data));
1362         }
1363       from += 2;
1364       n_left -= 2;
1365     }
1366
1367   while (n_left >= 1)
1368     {
1369       u32 bi0;
1370       vlib_buffer_t * b0;
1371       ip6_forward_next_trace_t * t0;
1372
1373       bi0 = from[0];
1374
1375       b0 = vlib_get_buffer (vm, bi0);
1376
1377       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1378         {
1379           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1380           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1381           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1382           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1383                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1384           clib_memcpy (t0->packet_data,
1385                   vlib_buffer_get_current (b0),
1386                   sizeof (t0->packet_data));
1387         }
1388       from += 1;
1389       n_left -= 1;
1390     }
1391 }
1392
1393 static uword
1394 ip6_drop_or_punt (vlib_main_t * vm,
1395                   vlib_node_runtime_t * node,
1396                   vlib_frame_t * frame,
1397                   ip6_error_t error_code)
1398 {
1399   u32 * buffers = vlib_frame_vector_args (frame);
1400   uword n_packets = frame->n_vectors;
1401
1402   vlib_error_drop_buffers (vm, node,
1403                            buffers,
1404                            /* stride */ 1,
1405                            n_packets,
1406                            /* next */ 0,
1407                            ip6_input_node.index,
1408                            error_code);
1409
1410   if (node->flags & VLIB_NODE_FLAG_TRACE)
1411     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1412
1413   return n_packets;
1414 }
1415
1416 static uword
1417 ip6_drop (vlib_main_t * vm,
1418           vlib_node_runtime_t * node,
1419           vlib_frame_t * frame)
1420 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
1421
1422 static uword
1423 ip6_punt (vlib_main_t * vm,
1424           vlib_node_runtime_t * node,
1425           vlib_frame_t * frame)
1426 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
1427
1428 static uword
1429 ip6_miss (vlib_main_t * vm,
1430           vlib_node_runtime_t * node,
1431           vlib_frame_t * frame)
1432 { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); }
1433
1434 VLIB_REGISTER_NODE (ip6_drop_node,static) = {
1435   .function = ip6_drop,
1436   .name = "ip6-drop",
1437   .vector_size = sizeof (u32),
1438
1439   .format_trace = format_ip6_forward_next_trace,
1440
1441   .n_next_nodes = 1,
1442   .next_nodes = {
1443     [0] = "error-drop",
1444   },
1445 };
1446
1447 VLIB_REGISTER_NODE (ip6_punt_node,static) = {
1448   .function = ip6_punt,
1449   .name = "ip6-punt",
1450   .vector_size = sizeof (u32),
1451
1452   .format_trace = format_ip6_forward_next_trace,
1453
1454   .n_next_nodes = 1,
1455   .next_nodes = {
1456     [0] = "error-punt",
1457   },
1458 };
1459
1460 VLIB_REGISTER_NODE (ip6_miss_node,static) = {
1461   .function = ip6_miss,
1462   .name = "ip6-miss",
1463   .vector_size = sizeof (u32),
1464
1465   .format_trace = format_ip6_forward_next_trace,
1466
1467   .n_next_nodes = 1,
1468   .next_nodes = {
1469     [0] = "error-drop",
1470   },
1471 };
1472
1473 VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
1474   .function = ip6_drop,
1475   .name = "ip6-multicast",
1476   .vector_size = sizeof (u32),
1477
1478   .format_trace = format_ip6_forward_next_trace,
1479
1480   .n_next_nodes = 1,
1481   .next_nodes = {
1482     [0] = "error-drop",
1483   },
1484 };
1485
1486 /* Compute TCP/UDP/ICMP6 checksum in software. */
1487 u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
1488 {
1489   ip_csum_t sum0;
1490   u16 sum16, payload_length_host_byte_order;
1491   u32 i, n_this_buffer, n_bytes_left;
1492   u32 headers_size = sizeof(ip0[0]);
1493   void * data_this_buffer;
1494
1495   ASSERT(bogus_lengthp);
1496   *bogus_lengthp = 0;
1497
1498   /* Initialize checksum with ip header. */
1499   sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
1500   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
1501   data_this_buffer = (void *) (ip0 + 1);
1502  
1503   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
1504     {
1505       sum0 = ip_csum_with_carry (sum0,
1506                                  clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
1507       sum0 = ip_csum_with_carry (sum0,
1508                                  clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
1509     }
1510
1511   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1512   if (PREDICT_FALSE (ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
1513     {
1514       u32  skip_bytes;
1515       ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)data_this_buffer;
1516
1517       /* validate really icmp6 next */
1518       ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
1519
1520       skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
1521       data_this_buffer  = (void *)((u8 *)data_this_buffer + skip_bytes);
1522  
1523       payload_length_host_byte_order  -= skip_bytes;
1524       headers_size += skip_bytes;
1525    }
1526
1527   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1528 #if DPDK > 0
1529   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1530   {
1531     struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer(p0);
1532     u8 nb_segs = mb->nb_segs;
1533
1534     n_this_buffer = (p0->current_length > headers_size ?
1535                      p0->current_length - headers_size : 0);
1536     while (n_bytes_left)
1537       {
1538         sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1539         n_bytes_left -= n_this_buffer;
1540
1541         mb = mb->next;
1542         nb_segs--;
1543         if ((nb_segs == 0) || (mb == 0))
1544           break;
1545
1546         data_this_buffer = rte_ctrlmbuf_data(mb);
1547         n_this_buffer = mb->data_len;
1548       }
1549     if (n_bytes_left || nb_segs)
1550       {
1551         *bogus_lengthp = 1;
1552         return 0xfefe;
1553       }
1554   } 
1555   else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1556 #else
1557   if (p0 && n_this_buffer + headers_size  > p0->current_length)
1558     n_this_buffer = p0->current_length > headers_size  ? p0->current_length - headers_size  : 0;
1559   while (1)
1560     {
1561       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1562       n_bytes_left -= n_this_buffer;
1563       if (n_bytes_left == 0)
1564         break;
1565
1566       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1567         {
1568           *bogus_lengthp = 1;
1569           return 0xfefe;
1570         }
1571       p0 = vlib_get_buffer (vm, p0->next_buffer);
1572       data_this_buffer = vlib_buffer_get_current (p0);
1573       n_this_buffer = p0->current_length;
1574     }
1575 #endif /* DPDK */
1576
1577   sum16 = ~ ip_csum_fold (sum0);
1578
1579   return sum16;
1580 }
1581
1582 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1583 {
1584   ip6_header_t * ip0 = vlib_buffer_get_current (p0);
1585   udp_header_t * udp0;
1586   u16 sum16;
1587   int bogus_length;
1588
1589   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
1590   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1591           || ip0->protocol == IP_PROTOCOL_ICMP6
1592           || ip0->protocol == IP_PROTOCOL_UDP
1593           || ip0->protocol ==  IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
1594
1595   udp0 = (void *) (ip0 + 1);
1596   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1597     {
1598       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1599                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1600       return p0->flags;
1601     }
1602
1603   sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
1604
1605   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1606                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1607
1608   return p0->flags;
1609 }
1610
1611 static uword
1612 ip6_local (vlib_main_t * vm,
1613            vlib_node_runtime_t * node,
1614            vlib_frame_t * frame)
1615 {
1616   ip6_main_t * im = &ip6_main;
1617   ip_lookup_main_t * lm = &im->lookup_main;
1618   ip_local_next_t next_index;
1619   u32 * from, * to_next, n_left_from, n_left_to_next;
1620   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
1621
1622   from = vlib_frame_vector_args (frame);
1623   n_left_from = frame->n_vectors;
1624   next_index = node->cached_next_index;
1625   
1626   if (node->flags & VLIB_NODE_FLAG_TRACE)
1627     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1628
1629   while (n_left_from > 0)
1630     {
1631       vlib_get_next_frame (vm, node, next_index,
1632                            to_next, n_left_to_next);
1633
1634       while (n_left_from >= 4 && n_left_to_next >= 2)
1635         {
1636           vlib_buffer_t * p0, * p1;
1637           ip6_header_t * ip0, * ip1;
1638           udp_header_t * udp0, * udp1;
1639           u32 pi0, ip_len0, udp_len0, flags0, next0;
1640           u32 pi1, ip_len1, udp_len1, flags1, next1;
1641           i32 len_diff0, len_diff1;
1642           u8 error0, type0, good_l4_checksum0;
1643           u8 error1, type1, good_l4_checksum1;
1644       
1645           pi0 = to_next[0] = from[0];
1646           pi1 = to_next[1] = from[1];
1647           from += 2;
1648           n_left_from -= 2;
1649           to_next += 2;
1650           n_left_to_next -= 2;
1651       
1652           p0 = vlib_get_buffer (vm, pi0);
1653           p1 = vlib_get_buffer (vm, pi1);
1654
1655           ip0 = vlib_buffer_get_current (p0);
1656           ip1 = vlib_buffer_get_current (p1);
1657
1658           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1659           type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
1660
1661           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1662           next1 = lm->local_next_by_ip_protocol[ip1->protocol];
1663
1664           flags0 = p0->flags;
1665           flags1 = p1->flags;
1666
1667           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1668           good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1669
1670           udp0 = ip6_next_header (ip0);
1671           udp1 = ip6_next_header (ip1);
1672
1673           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1674           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1675           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
1676
1677           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1678           good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1679
1680           /* Verify UDP length. */
1681           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1682           ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
1683           udp_len0 = clib_net_to_host_u16 (udp0->length);
1684           udp_len1 = clib_net_to_host_u16 (udp1->length);
1685
1686           len_diff0 = ip_len0 - udp_len0;
1687           len_diff1 = ip_len1 - udp_len1;
1688
1689           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1690           len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
1691
1692           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1693                              && ! good_l4_checksum0
1694                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1695             {
1696               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1697               good_l4_checksum0 =
1698                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1699             }
1700           if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
1701                              && ! good_l4_checksum1
1702                              && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1703             {
1704               flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
1705               good_l4_checksum1 =
1706                 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1707             }
1708
1709           error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
1710
1711           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1712           error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
1713
1714           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1715           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1716           error0 = (! good_l4_checksum0
1717                     ? IP6_ERROR_UDP_CHECKSUM + type0
1718                     : error0);
1719           error1 = (! good_l4_checksum1
1720                     ? IP6_ERROR_UDP_CHECKSUM + type1
1721                     : error1);
1722
1723           /* Drop packets from unroutable hosts. */
1724           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1725           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
1726             {
1727               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1728               error0 = (lm->miss_adj_index == src_adj_index0
1729                         ? IP6_ERROR_SRC_LOOKUP_MISS
1730                         : error0);
1731             }
1732           if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP)
1733             {
1734               u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
1735               error1 = (lm->miss_adj_index == src_adj_index1
1736                         ? IP6_ERROR_SRC_LOOKUP_MISS
1737                         : error1);
1738             }
1739
1740           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1741           next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1742
1743           p0->error = error_node->errors[error0];
1744           p1->error = error_node->errors[error1];
1745
1746           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1747                                            to_next, n_left_to_next,
1748                                            pi0, pi1, next0, next1);
1749         }
1750
1751       while (n_left_from > 0 && n_left_to_next > 0)
1752         {
1753           vlib_buffer_t * p0;
1754           ip6_header_t * ip0;
1755           udp_header_t * udp0;
1756           u32 pi0, ip_len0, udp_len0, flags0, next0;
1757           i32 len_diff0;
1758           u8 error0, type0, good_l4_checksum0;
1759       
1760           pi0 = to_next[0] = from[0];
1761           from += 1;
1762           n_left_from -= 1;
1763           to_next += 1;
1764           n_left_to_next -= 1;
1765       
1766           p0 = vlib_get_buffer (vm, pi0);
1767
1768           ip0 = vlib_buffer_get_current (p0);
1769
1770           type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
1771           next0 = lm->local_next_by_ip_protocol[ip0->protocol];
1772
1773           flags0 = p0->flags;
1774
1775           good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1776
1777           udp0 = ip6_next_header (ip0);
1778
1779           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1780           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
1781
1782           good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
1783
1784           /* Verify UDP length. */
1785           ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
1786           udp_len0 = clib_net_to_host_u16 (udp0->length);
1787
1788           len_diff0 = ip_len0 - udp_len0;
1789
1790           len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
1791
1792           if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
1793                              && ! good_l4_checksum0
1794                              && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
1795             {
1796               flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
1797               good_l4_checksum0 =
1798                 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1799             }
1800
1801           error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
1802
1803           error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
1804
1805           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
1806           ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
1807           error0 = (! good_l4_checksum0
1808                     ? IP6_ERROR_UDP_CHECKSUM + type0
1809                     : error0);
1810
1811           /* If this is a neighbor solicitation (ICMP), skip source RPF check */
1812           if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
1813             {
1814               u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
1815               error0 = (lm->miss_adj_index == src_adj_index0
1816                         ? IP6_ERROR_SRC_LOOKUP_MISS
1817                         : error0);
1818             }
1819
1820           next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1821
1822           p0->error = error_node->errors[error0];
1823
1824           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1825                                            to_next, n_left_to_next,
1826                                            pi0, next0);
1827         }
1828   
1829       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1830     }
1831
1832   return frame->n_vectors;
1833 }
1834
1835 VLIB_REGISTER_NODE (ip6_local_node,static) = {
1836   .function = ip6_local,
1837   .name = "ip6-local",
1838   .vector_size = sizeof (u32),
1839
1840   .format_trace = format_ip6_forward_next_trace,
1841
1842   .n_next_nodes = IP_LOCAL_N_NEXT,
1843   .next_nodes = {
1844     [IP_LOCAL_NEXT_DROP] = "error-drop",
1845     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1846     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
1847     [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
1848   },
1849 };
1850
1851 void ip6_register_protocol (u32 protocol, u32 node_index)
1852 {
1853   vlib_main_t * vm = vlib_get_main();
1854   ip6_main_t * im = &ip6_main;
1855   ip_lookup_main_t * lm = &im->lookup_main;
1856
1857   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1858   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
1859 }
1860
1861 typedef enum {
1862   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1863   IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
1864   IP6_DISCOVER_NEIGHBOR_N_NEXT,
1865 } ip6_discover_neighbor_next_t;
1866
1867 typedef enum {
1868   IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
1869   IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
1870 } ip6_discover_neighbor_error_t;
1871
1872 static uword
1873 ip6_discover_neighbor (vlib_main_t * vm,
1874                        vlib_node_runtime_t * node,
1875                        vlib_frame_t * frame)
1876 {
1877   vnet_main_t * vnm = vnet_get_main();
1878   ip6_main_t * im = &ip6_main;
1879   ip_lookup_main_t * lm = &im->lookup_main;
1880   u32 * from, * to_next_drop;
1881   uword n_left_from, n_left_to_next_drop;
1882   static f64 time_last_seed_change = -1e100;
1883   static u32 hash_seeds[3];
1884   static uword hash_bitmap[256 / BITS (uword)]; 
1885   f64 time_now;
1886   int bogus_length;
1887
1888   if (node->flags & VLIB_NODE_FLAG_TRACE)
1889     ip6_forward_next_trace (vm, node, frame, VLIB_TX);
1890
1891   time_now = vlib_time_now (vm);
1892   if (time_now - time_last_seed_change > 1e-3)
1893     {
1894       uword i;
1895       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1896                                              sizeof (hash_seeds));
1897       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1898         hash_seeds[i] = r[i];
1899
1900       /* Mark all hash keys as been not-seen before. */
1901       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1902         hash_bitmap[i] = 0;
1903
1904       time_last_seed_change = time_now;
1905     }
1906
1907   from = vlib_frame_vector_args (frame);
1908   n_left_from = frame->n_vectors;
1909
1910   while (n_left_from > 0)
1911     {
1912       vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
1913                            to_next_drop, n_left_to_next_drop);
1914
1915       while (n_left_from > 0 && n_left_to_next_drop > 0)
1916         {
1917           vlib_buffer_t * p0;
1918           ip6_header_t * ip0;
1919           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1920           uword bm0;
1921           ip_adjacency_t * adj0;
1922           vnet_hw_interface_t * hw_if0;
1923           u32 next0;
1924
1925           pi0 = from[0];
1926
1927           p0 = vlib_get_buffer (vm, pi0);
1928
1929           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1930
1931           ip0 = vlib_buffer_get_current (p0);
1932
1933           adj0 = ip_get_adjacency (lm, adj_index0);
1934
1935           if (adj0->arp.next_hop.ip6.as_u64[0] ||
1936               adj0->arp.next_hop.ip6.as_u64[1]) {
1937             ip0->dst_address.as_u64[0] = adj0->arp.next_hop.ip6.as_u64[0];
1938             ip0->dst_address.as_u64[1] = adj0->arp.next_hop.ip6.as_u64[1];
1939           }
1940
1941           a0 = hash_seeds[0];
1942           b0 = hash_seeds[1];
1943           c0 = hash_seeds[2];
1944
1945           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1946           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1947
1948           a0 ^= sw_if_index0;
1949           b0 ^= ip0->dst_address.as_u32[0];
1950           c0 ^= ip0->dst_address.as_u32[1];
1951
1952           hash_v3_mix32 (a0, b0, c0);
1953
1954           b0 ^= ip0->dst_address.as_u32[2];
1955           c0 ^= ip0->dst_address.as_u32[3];
1956
1957           hash_v3_finalize32 (a0, b0, c0);
1958
1959           c0 &= BITS (hash_bitmap) - 1;
1960           c0 = c0 / BITS (uword);
1961           m0 = (uword) 1 << (c0 % BITS (uword));
1962
1963           bm0 = hash_bitmap[c0];
1964           drop0 = (bm0 & m0) != 0;
1965
1966           /* Mark it as seen. */
1967           hash_bitmap[c0] = bm0 | m0;
1968
1969           from += 1;
1970           n_left_from -= 1;
1971           to_next_drop[0] = pi0;
1972           to_next_drop += 1;
1973           n_left_to_next_drop -= 1;
1974
1975           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1976
1977           /* If the interface is link-down, drop the pkt */
1978           if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
1979             drop0 = 1;
1980
1981           p0->error = 
1982             node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP 
1983                          : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
1984           if (drop0)
1985             continue;
1986
1987           {
1988             u32 bi0 = 0;
1989             icmp6_neighbor_solicitation_header_t * h0;
1990             vlib_buffer_t * b0;
1991
1992             h0 = vlib_packet_template_get_packet 
1993               (vm, &im->discover_neighbor_packet_template, &bi0);
1994
1995             /* 
1996              * Build ethernet header.
1997              * Choose source address based on destination lookup 
1998              * adjacency. 
1999              */
2000             ip6_src_address_for_packet (im, p0, &h0->ip.src_address, 
2001                                         sw_if_index0);
2002
2003             /* 
2004              * Destination address is a solicited node multicast address.  
2005              * We need to fill in
2006              * the low 24 bits with low 24 bits of target's address. 
2007              */
2008             h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
2009             h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
2010             h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
2011
2012             h0->neighbor.target_address = ip0->dst_address;
2013
2014             clib_memcpy (h0->link_layer_option.ethernet_address, 
2015                     hw_if0->hw_address, vec_len (hw_if0->hw_address));
2016
2017             /* $$$$ appears we need this; why is the checksum non-zero? */
2018             h0->neighbor.icmp.checksum = 0;
2019             h0->neighbor.icmp.checksum = 
2020               ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, 
2021                                                  &bogus_length);
2022
2023             ASSERT (bogus_length == 0);
2024
2025             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2026             b0 = vlib_get_buffer (vm, bi0);
2027             vnet_buffer (b0)->sw_if_index[VLIB_TX] 
2028               = vnet_buffer (p0)->sw_if_index[VLIB_TX];
2029
2030             /* Add rewrite/encap string. */
2031             vnet_rewrite_one_header (adj0[0], h0, 
2032                                      sizeof (ethernet_header_t));
2033             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2034
2035             next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
2036
2037             vlib_set_next_frame_buffer (vm, node, next0, bi0);
2038           }
2039         }
2040
2041       vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, 
2042                            n_left_to_next_drop);
2043     }
2044
2045   return frame->n_vectors;
2046 }
2047
2048 static char * ip6_discover_neighbor_error_strings[] = {
2049   [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
2050   [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] 
2051   = "neighbor solicitations sent",
2052 };
2053
2054 VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
2055   .function = ip6_discover_neighbor,
2056   .name = "ip6-discover-neighbor",
2057   .vector_size = sizeof (u32),
2058
2059   .format_trace = format_ip6_forward_next_trace,
2060
2061   .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
2062   .error_strings = ip6_discover_neighbor_error_strings,
2063
2064   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
2065   .next_nodes = {
2066     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
2067     [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
2068   },
2069 };
2070
2071 clib_error_t *
2072 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
2073 {
2074   vnet_main_t * vnm = vnet_get_main();
2075   ip6_main_t * im = &ip6_main;
2076   icmp6_neighbor_solicitation_header_t * h;
2077   ip6_address_t * src;
2078   ip_interface_address_t * ia;
2079   ip_adjacency_t * adj;
2080   vnet_hw_interface_t * hi;
2081   vnet_sw_interface_t * si;
2082   vlib_buffer_t * b;
2083   u32 bi = 0;
2084   int bogus_length;
2085
2086   si = vnet_get_sw_interface (vnm, sw_if_index);
2087
2088   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2089     {
2090       return clib_error_return (0, "%U: interface %U down",
2091                                 format_ip6_address, dst, 
2092                                 format_vnet_sw_if_index_name, vnm, 
2093                                 sw_if_index);
2094     }
2095
2096   src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2097   if (! src)
2098     {
2099       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2100       return clib_error_return 
2101         (0, "no matching interface address for destination %U (interface %U)",
2102          format_ip6_address, dst,
2103          format_vnet_sw_if_index_name, vnm, sw_if_index);
2104     }
2105
2106   h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
2107
2108   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2109
2110   /* Destination address is a solicited node multicast address.  We need to fill in
2111      the low 24 bits with low 24 bits of target's address. */
2112   h->ip.dst_address.as_u8[13] = dst->as_u8[13];
2113   h->ip.dst_address.as_u8[14] = dst->as_u8[14];
2114   h->ip.dst_address.as_u8[15] = dst->as_u8[15];
2115
2116   h->ip.src_address = src[0];
2117   h->neighbor.target_address = dst[0];
2118
2119   clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
2120
2121   h->neighbor.icmp.checksum = 
2122     ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
2123   ASSERT(bogus_length == 0);
2124
2125   b = vlib_get_buffer (vm, bi);
2126   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2127
2128   /* Add encapsulation string for software interface (e.g. ethernet header). */
2129   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2130   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2131   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2132
2133   {
2134     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2135     u32 * to_next = vlib_frame_vector_args (f);
2136     to_next[0] = bi;
2137     f->n_vectors = 1;
2138     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2139   }
2140
2141   return /* no error */ 0;
2142 }
2143
2144 typedef enum {
2145   IP6_REWRITE_NEXT_DROP,
2146 } ip6_rewrite_next_t;
2147
2148 always_inline uword
2149 ip6_rewrite_inline (vlib_main_t * vm,
2150                     vlib_node_runtime_t * node,
2151                     vlib_frame_t * frame,
2152                     int rewrite_for_locally_received_packets)
2153 {
2154   ip_lookup_main_t * lm = &ip6_main.lookup_main;
2155   u32 * from = vlib_frame_vector_args (frame);
2156   u32 n_left_from, n_left_to_next, * to_next, next_index;
2157   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
2158   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2159
2160   n_left_from = frame->n_vectors;
2161   next_index = node->cached_next_index;
2162   u32 cpu_index = os_get_cpu_number();
2163   
2164   while (n_left_from > 0)
2165     {
2166       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2167
2168       while (n_left_from >= 4 && n_left_to_next >= 2)
2169         {
2170           ip_adjacency_t * adj0, * adj1;
2171           vlib_buffer_t * p0, * p1;
2172           ip6_header_t * ip0, * ip1;
2173           u32 pi0, rw_len0, next0, error0, adj_index0;
2174           u32 pi1, rw_len1, next1, error1, adj_index1;
2175       
2176           /* Prefetch next iteration. */
2177           {
2178             vlib_buffer_t * p2, * p3;
2179
2180             p2 = vlib_get_buffer (vm, from[2]);
2181             p3 = vlib_get_buffer (vm, from[3]);
2182
2183             vlib_prefetch_buffer_header (p2, LOAD);
2184             vlib_prefetch_buffer_header (p3, LOAD);
2185
2186             CLIB_PREFETCH (p2->pre_data, 32, STORE);
2187             CLIB_PREFETCH (p3->pre_data, 32, STORE);
2188
2189             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2190             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2191           }
2192
2193           pi0 = to_next[0] = from[0];
2194           pi1 = to_next[1] = from[1];
2195
2196           from += 2;
2197           n_left_from -= 2;
2198           to_next += 2;
2199           n_left_to_next -= 2;
2200       
2201           p0 = vlib_get_buffer (vm, pi0);
2202           p1 = vlib_get_buffer (vm, pi1);
2203
2204           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2205           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2206
2207           /* We should never rewrite a pkt using the MISS adjacency */
2208           ASSERT(adj_index0 && adj_index1);
2209
2210           ip0 = vlib_buffer_get_current (p0);
2211           ip1 = vlib_buffer_get_current (p1);
2212
2213           error0 = error1 = IP6_ERROR_NONE;
2214
2215           if (! rewrite_for_locally_received_packets)
2216             {
2217               i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
2218
2219               /* Input node should have reject packets with hop limit 0. */
2220               ASSERT (ip0->hop_limit > 0);
2221               ASSERT (ip1->hop_limit > 0);
2222
2223               hop_limit0 -= 1;
2224               hop_limit1 -= 1;
2225
2226               ip0->hop_limit = hop_limit0;
2227               ip1->hop_limit = hop_limit1;
2228
2229               error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
2230               error1 = hop_limit1 <= 0 ? IP6_ERROR_TIME_EXPIRED : error1;
2231             }
2232
2233           adj0 = ip_get_adjacency (lm, adj_index0);
2234           adj1 = ip_get_adjacency (lm, adj_index1);
2235
2236           if (rewrite_for_locally_received_packets)
2237             {
2238               /*
2239                * If someone sends e.g. an icmp6 w/ src = dst = interface addr,
2240                * we end up here with a local adjacency in hand
2241                */
2242               if (PREDICT_FALSE(adj0->lookup_next_index 
2243                                 == IP_LOOKUP_NEXT_LOCAL))
2244                 error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2245               if (PREDICT_FALSE(adj1->lookup_next_index 
2246                                 == IP_LOOKUP_NEXT_LOCAL))
2247                 error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2248             }
2249
2250           rw_len0 = adj0[0].rewrite_header.data_bytes;
2251           rw_len1 = adj1[0].rewrite_header.data_bytes;
2252
2253           vlib_increment_combined_counter (&lm->adjacency_counters,
2254                                            cpu_index, 
2255                                            adj_index0,
2256                                            /* packet increment */ 0,
2257                                            /* byte increment */ rw_len0);
2258           vlib_increment_combined_counter (&lm->adjacency_counters,
2259                                            cpu_index, 
2260                                            adj_index1,
2261                                            /* packet increment */ 0,
2262                                            /* byte increment */ rw_len1);
2263
2264           /* Check MTU of outgoing interface. */
2265           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2266                     ? IP6_ERROR_MTU_EXCEEDED
2267                     : error0);
2268           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2269                     ? IP6_ERROR_MTU_EXCEEDED
2270                     : error1);
2271
2272           p0->current_data -= rw_len0;
2273           p1->current_data -= rw_len1;
2274
2275           p0->current_length += rw_len0;
2276           p1->current_length += rw_len1;
2277
2278           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2279           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2280       
2281           next0 = (error0 == IP6_ERROR_NONE) ? 
2282             adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2283           next1 = (error1 == IP6_ERROR_NONE) ? 
2284             adj1[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2285
2286           /* Guess we are only writing on simple Ethernet header. */
2287           vnet_rewrite_two_headers (adj0[0], adj1[0],
2288                                     ip0, ip1,
2289                                     sizeof (ethernet_header_t));
2290       
2291           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2292                                            to_next, n_left_to_next,
2293                                            pi0, pi1, next0, next1);
2294         }
2295
2296       while (n_left_from > 0 && n_left_to_next > 0)
2297         {
2298           ip_adjacency_t * adj0;
2299           vlib_buffer_t * p0;
2300           ip6_header_t * ip0;
2301           u32 pi0, rw_len0;
2302           u32 adj_index0, next0, error0;
2303       
2304           pi0 = to_next[0] = from[0];
2305
2306           p0 = vlib_get_buffer (vm, pi0);
2307
2308           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2309
2310           /* We should never rewrite a pkt using the MISS adjacency */
2311           ASSERT(adj_index0);
2312
2313           adj0 = ip_get_adjacency (lm, adj_index0);
2314       
2315           ip0 = vlib_buffer_get_current (p0);
2316
2317           error0 = IP6_ERROR_NONE;
2318
2319           /* Check hop limit */
2320           if (! rewrite_for_locally_received_packets)
2321             {
2322               i32 hop_limit0 = ip0->hop_limit;
2323
2324               ASSERT (ip0->hop_limit > 0);
2325
2326               hop_limit0 -= 1;
2327
2328               ip0->hop_limit = hop_limit0;
2329
2330               error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
2331             }
2332
2333           if (rewrite_for_locally_received_packets)
2334             {
2335               if (PREDICT_FALSE(adj0->lookup_next_index 
2336                                 == IP_LOOKUP_NEXT_LOCAL))
2337                 error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
2338             }
2339
2340           /* Guess we are only writing on simple Ethernet header. */
2341           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2342       
2343           /* Update packet buffer attributes/set output interface. */
2344           rw_len0 = adj0[0].rewrite_header.data_bytes;
2345
2346           vlib_increment_combined_counter (&lm->adjacency_counters,
2347                                            cpu_index, 
2348                                            adj_index0,
2349                                            /* packet increment */ 0,
2350                                            /* byte increment */ rw_len0);
2351
2352           /* Check MTU of outgoing interface. */
2353           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2354                     ? IP6_ERROR_MTU_EXCEEDED
2355                     : error0);
2356
2357           p0->current_data -= rw_len0;
2358           p0->current_length += rw_len0;
2359           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2360       
2361           next0 = (error0 == IP6_ERROR_NONE) ?
2362             adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
2363
2364           p0->error = error_node->errors[error0];
2365
2366           from += 1;
2367           n_left_from -= 1;
2368           to_next += 1;
2369           n_left_to_next -= 1;
2370       
2371           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2372                                            to_next, n_left_to_next,
2373                                            pi0, next0);
2374         }
2375
2376       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2377     }
2378
2379   /* Need to do trace after rewrites to pick up new packet data. */
2380   if (node->flags & VLIB_NODE_FLAG_TRACE)
2381     ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
2382
2383   return frame->n_vectors;
2384 }
2385
2386 static uword
2387 ip6_rewrite_transit (vlib_main_t * vm,
2388                      vlib_node_runtime_t * node,
2389                      vlib_frame_t * frame)
2390 {
2391   return ip6_rewrite_inline (vm, node, frame,
2392                              /* rewrite_for_locally_received_packets */ 0);
2393 }
2394
2395 static uword
2396 ip6_rewrite_local (vlib_main_t * vm,
2397                    vlib_node_runtime_t * node,
2398                    vlib_frame_t * frame)
2399 {
2400   return ip6_rewrite_inline (vm, node, frame,
2401                              /* rewrite_for_locally_received_packets */ 1);
2402 }
2403
2404 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
2405   .function = ip6_rewrite_transit,
2406   .name = "ip6-rewrite",
2407   .vector_size = sizeof (u32),
2408
2409   .format_trace = format_ip6_forward_next_trace,
2410
2411   .n_next_nodes = 1,
2412   .next_nodes = {
2413     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2414   },
2415 };
2416
2417 VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = {
2418   .function = ip6_rewrite_local,
2419   .name = "ip6-rewrite-local",
2420   .vector_size = sizeof (u32),
2421
2422   .sibling_of = "ip6-rewrite",
2423
2424   .format_trace = format_ip6_forward_next_trace,
2425
2426   .n_next_nodes = 1,
2427   .next_nodes = {
2428     [IP6_REWRITE_NEXT_DROP] = "error-drop",
2429   },
2430 };
2431
2432 /* Global IP6 main. */
2433 ip6_main_t ip6_main;
2434
2435 static clib_error_t *
2436 ip6_lookup_init (vlib_main_t * vm)
2437 {
2438   ip6_main_t * im = &ip6_main;
2439   uword i;
2440
2441   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
2442     {
2443       u32 j, i0, i1;
2444
2445       i0 = i / 32;
2446       i1 = i % 32;
2447
2448       for (j = 0; j < i0; j++)
2449         im->fib_masks[i].as_u32[j] = ~0;
2450
2451       if (i1)
2452         im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
2453     }
2454
2455   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
2456
2457   if (im->lookup_table_nbuckets == 0)
2458     im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
2459
2460   im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
2461
2462   if (im->lookup_table_size == 0)
2463     im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
2464   
2465   BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table",
2466                         im->lookup_table_nbuckets,
2467                         im->lookup_table_size);
2468   
2469   /* Create FIB with index 0 and table id of 0. */
2470   find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID);
2471
2472   {
2473     pg_node_t * pn;
2474     pn = pg_get_node (ip6_lookup_node.index);
2475     pn->unformat_edit = unformat_pg_ip6_header;
2476   }
2477
2478   {
2479     icmp6_neighbor_solicitation_header_t p;
2480
2481     memset (&p, 0, sizeof (p));
2482
2483     p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
2484     p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
2485                                                 - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
2486     p.ip.protocol = IP_PROTOCOL_ICMP6;
2487     p.ip.hop_limit = 255;
2488     ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
2489
2490     p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
2491
2492     p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
2493     p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
2494
2495     vlib_packet_template_init (vm,
2496                                &im->discover_neighbor_packet_template,
2497                                &p, sizeof (p),
2498                                /* alloc chunk size */ 8,
2499                                "ip6 neighbor discovery");
2500   }
2501
2502   return 0;
2503 }
2504
2505 VLIB_INIT_FUNCTION (ip6_lookup_init);
2506
2507 static clib_error_t *
2508 add_del_ip6_interface_table (vlib_main_t * vm,
2509                              unformat_input_t * input,
2510                              vlib_cli_command_t * cmd)
2511 {
2512   vnet_main_t * vnm = vnet_get_main();
2513   clib_error_t * error = 0;
2514   u32 sw_if_index, table_id;
2515
2516   sw_if_index = ~0;
2517
2518   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2519     {
2520       error = clib_error_return (0, "unknown interface `%U'",
2521                                  format_unformat_error, input);
2522       goto done;
2523     }
2524
2525   if (unformat (input, "%d", &table_id))
2526     ;
2527   else
2528     {
2529       error = clib_error_return (0, "expected table id `%U'",
2530                                  format_unformat_error, input);
2531       goto done;
2532     }
2533
2534   {
2535     ip6_main_t * im = &ip6_main;
2536     ip6_fib_t * fib = 
2537       find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID);
2538
2539     if (fib) 
2540       {
2541         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2542         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2543     }
2544   }
2545
2546  done:
2547   return error;
2548 }
2549
2550 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2551   .path = "set interface ip6 table",
2552   .function = add_del_ip6_interface_table,
2553   .short_help = "set interface ip6 table <intfc> <table-id>"
2554 };
2555
2556 void 
2557 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
2558                                                   u8 *mac)
2559 {
2560   ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
2561   /* Invert the "u" bit */
2562   ip->as_u8 [8] = mac[0] ^ (1<<1);
2563   ip->as_u8 [9] = mac[1];
2564   ip->as_u8 [10] = mac[2];
2565   ip->as_u8 [11] = 0xFF;
2566   ip->as_u8 [12] = 0xFE;
2567   ip->as_u8 [13] = mac[3];
2568   ip->as_u8 [14] = mac[4];
2569   ip->as_u8 [15] = mac[5];
2570 }
2571
2572 void 
2573 ip6_ethernet_mac_address_from_link_local_address (u8 *mac, 
2574                                                   ip6_address_t *ip)
2575 {
2576   /* Invert the previously inverted "u" bit */
2577   mac[0] = ip->as_u8 [8] ^ (1<<1);
2578   mac[1] = ip->as_u8 [9];
2579   mac[2] = ip->as_u8 [10];
2580   mac[3] = ip->as_u8 [13];
2581   mac[4] = ip->as_u8 [14];
2582   mac[5] = ip->as_u8 [15];
2583 }
2584
2585 static clib_error_t * 
2586 test_ip6_link_command_fn (vlib_main_t * vm,
2587                           unformat_input_t * input,
2588                           vlib_cli_command_t * cmd)
2589 {
2590   u8 mac[6];
2591   ip6_address_t _a, *a = &_a;
2592
2593   if (unformat (input, "%U", unformat_ethernet_address, mac))
2594     {
2595       ip6_link_local_address_from_ethernet_mac_address (a, mac);
2596       vlib_cli_output (vm, "Link local address: %U",
2597                        format_ip6_address, a);
2598       ip6_ethernet_mac_address_from_link_local_address (mac, a);
2599       vlib_cli_output (vm, "Original MAC address: %U",
2600                        format_ethernet_address, mac);
2601     }
2602                 
2603   return 0;
2604 }
2605
2606 VLIB_CLI_COMMAND (test_link_command, static) = {
2607   .path = "test ip6 link",
2608   .function = test_ip6_link_command_fn, 
2609   .short_help = "test ip6 link <mac-address>",
2610 };
2611
2612 int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
2613 {
2614   ip6_main_t * im6 = &ip6_main;
2615   ip6_fib_t * fib;
2616   uword * p = hash_get (im6->fib_index_by_table_id, table_id);
2617
2618   if (p == 0)
2619     return -1;
2620
2621   fib = vec_elt_at_index (im6->fibs, p[0]);
2622
2623   fib->flow_hash_config = flow_hash_config;
2624   return 1;
2625 }
2626
2627 static clib_error_t *
2628 set_ip6_flow_hash_command_fn (vlib_main_t * vm,
2629                               unformat_input_t * input,
2630                               vlib_cli_command_t * cmd)
2631 {
2632   int matched = 0;
2633   u32 table_id = 0;
2634   u32 flow_hash_config = 0;
2635   int rv;
2636
2637   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2638     if (unformat (input, "table %d", &table_id))
2639       matched = 1;
2640 #define _(a,v) \
2641     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2642     foreach_flow_hash_bit
2643 #undef _
2644     else break;
2645   }
2646
2647   if (matched == 0)
2648     return clib_error_return (0, "unknown input `%U'",
2649                               format_unformat_error, input);
2650   
2651   rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
2652   switch (rv)
2653     {
2654     case 1:
2655       break;
2656
2657     case -1:
2658       return clib_error_return (0, "no such FIB table %d", table_id);
2659       
2660     default:
2661       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2662       break;
2663     }
2664   
2665   return 0;
2666 }
2667
2668 VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
2669     .path = "set ip6 flow-hash",
2670     .short_help = 
2671     "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
2672     .function = set_ip6_flow_hash_command_fn,
2673 };
2674
2675 static clib_error_t *
2676 show_ip6_local_command_fn (vlib_main_t * vm,
2677                            unformat_input_t * input,
2678                            vlib_cli_command_t * cmd)
2679 {
2680   ip6_main_t * im = &ip6_main;
2681   ip_lookup_main_t * lm = &im->lookup_main;
2682   int i;
2683   
2684   vlib_cli_output (vm, "Protocols handled by ip6_local");
2685   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2686     {
2687       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2688         vlib_cli_output (vm, "%d", i);
2689     }
2690   return 0;
2691 }
2692
2693
2694
2695 VLIB_CLI_COMMAND (show_ip_local, static) = {
2696   .path = "show ip6 local",
2697   .function = show_ip6_local_command_fn,
2698   .short_help = "Show ip6 local protocol table",
2699 };
2700
2701 int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
2702                                  u32 table_index)
2703 {
2704   vnet_main_t * vnm = vnet_get_main();
2705   vnet_interface_main_t * im = &vnm->interface_main;
2706   ip6_main_t * ipm = &ip6_main;
2707   ip_lookup_main_t * lm = &ipm->lookup_main;
2708   vnet_classify_main_t * cm = &vnet_classify_main;
2709
2710   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2711     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2712
2713   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2714     return VNET_API_ERROR_NO_SUCH_ENTRY;
2715
2716   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2717   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
2718
2719   return 0;
2720 }
2721
2722 static clib_error_t *
2723 set_ip6_classify_command_fn (vlib_main_t * vm,
2724                              unformat_input_t * input,
2725                              vlib_cli_command_t * cmd)
2726 {
2727   u32 table_index = ~0;
2728   int table_index_set = 0;
2729   u32 sw_if_index = ~0;
2730   int rv;
2731   
2732   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2733     if (unformat (input, "table-index %d", &table_index))
2734       table_index_set = 1;
2735     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
2736                        vnet_get_main(), &sw_if_index))
2737         ;
2738     else
2739         break;
2740   }
2741   
2742   if (table_index_set == 0)
2743       return clib_error_return (0, "classify table-index must be specified");
2744   
2745   if (sw_if_index == ~0)
2746     return clib_error_return (0, "interface / subif must be specified");
2747
2748   rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
2749
2750   switch (rv)
2751     {
2752     case 0:
2753       break;
2754
2755     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2756       return clib_error_return (0, "No such interface");
2757
2758     case VNET_API_ERROR_NO_SUCH_ENTRY:
2759       return clib_error_return (0, "No such classifier table");
2760     }
2761   return 0;
2762 }
2763
2764 VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
2765     .path = "set ip6 classify",
2766     .short_help = 
2767     "set ip6 classify intfc <int> table-index <index>",
2768     .function = set_ip6_classify_command_fn,
2769 };
2770
2771 static clib_error_t *
2772 ip6_config (vlib_main_t * vm, unformat_input_t * input)
2773 {
2774   ip6_main_t * im = &ip6_main;
2775   uword heapsize = 0;
2776   u32 tmp;
2777   u32 nbuckets = 0;
2778
2779   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2780     if (unformat (input, "hash-buckets %d", &tmp))
2781       nbuckets = tmp;
2782     else if (unformat (input, "heap-size %dm", &tmp))
2783       heapsize = ((u64)tmp) << 20;
2784     else if (unformat (input, "heap-size %dM", &tmp))
2785       heapsize = ((u64)tmp) << 20;
2786     else if (unformat (input, "heap-size %dg", &tmp))
2787       heapsize = ((u64)tmp) << 30;
2788     else if (unformat (input, "heap-size %dG", &tmp))
2789       heapsize = ((u64)tmp) << 30;
2790     else
2791       return clib_error_return (0, "unknown input '%U'",
2792                                 format_unformat_error, input);
2793   }
2794
2795   im->lookup_table_nbuckets = nbuckets;
2796   im->lookup_table_size = heapsize;
2797
2798   return 0;
2799 }
2800
2801 VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
2802