VPP-19: Split the lookup.h IP_LOOKUP_NEXT enum.
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       if (table_index_or_id == ~0) {
107         table_index_or_id = 0;
108         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
109           table_index_or_id++;
110         }
111         return create_fib_with_table_id (im, table_index_or_id);
112       }
113
114       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
115       if (! p)
116         return create_fib_with_table_id (im, table_index_or_id);
117       fib_index = p[0];
118     }
119   return vec_elt_at_index (im->fibs, fib_index);
120 }
121
122 static void
123 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
124                                        ip4_fib_t * fib,
125                                        u32 address_length)
126 {
127   hash_t * h;
128   uword max_index;
129
130   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
131   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
132
133   fib->adj_index_by_dst_address[address_length] =
134     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
135
136   hash_set_flags (fib->adj_index_by_dst_address[address_length],
137                   HASH_FLAG_NO_AUTO_SHRINK);
138
139   h = hash_header (fib->adj_index_by_dst_address[address_length]);
140   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
141
142   /* Initialize new/old hash value vectors. */
143   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
144   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
145 }
146
147 static void
148 ip4_fib_set_adj_index (ip4_main_t * im,
149                        ip4_fib_t * fib,
150                        u32 flags,
151                        u32 dst_address_u32,
152                        u32 dst_address_length,
153                        u32 adj_index)
154 {
155   ip_lookup_main_t * lm = &im->lookup_main;
156   uword * hash;
157
158   if (vec_bytes(fib->old_hash_values))
159     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
160   if (vec_bytes(fib->new_hash_values))
161     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
162   fib->new_hash_values[0] = adj_index;
163
164   /* Make sure adj index is valid. */
165   if (CLIB_DEBUG > 0)
166     (void) ip_get_adjacency (lm, adj_index);
167
168   hash = fib->adj_index_by_dst_address[dst_address_length];
169
170   hash = _hash_set3 (hash, dst_address_u32,
171                      fib->new_hash_values,
172                      fib->old_hash_values);
173
174   fib->adj_index_by_dst_address[dst_address_length] = hash;
175
176   if (vec_len (im->add_del_route_callbacks) > 0)
177     {
178       ip4_add_del_route_callback_t * cb;
179       ip4_address_t d;
180       uword * p;
181
182       d.data_u32 = dst_address_u32;
183       vec_foreach (cb, im->add_del_route_callbacks)
184         if ((flags & cb->required_flags) == cb->required_flags)
185           cb->function (im, cb->function_opaque,
186                         fib, flags,
187                         &d, dst_address_length,
188                         fib->old_hash_values,
189                         fib->new_hash_values);
190
191       p = hash_get (hash, dst_address_u32);
192       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
193     }
194 }
195
196 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
197 {
198   ip_lookup_main_t * lm = &im->lookup_main;
199   ip4_fib_t * fib;
200   u32 dst_address, dst_address_length, adj_index, old_adj_index;
201   uword * hash, is_del;
202   ip4_add_del_route_callback_t * cb;
203
204   /* Either create new adjacency or use given one depending on arguments. */
205   if (a->n_add_adj > 0)
206     {
207       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
208       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
209     }
210   else
211     adj_index = a->adj_index;
212
213   dst_address = a->dst_address.data_u32;
214   dst_address_length = a->dst_address_length;
215   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
216
217   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
218   dst_address &= im->fib_masks[dst_address_length];
219
220   if (! fib->adj_index_by_dst_address[dst_address_length])
221     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
222
223   hash = fib->adj_index_by_dst_address[dst_address_length];
224
225   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
226
227   if (is_del)
228     {
229       fib->old_hash_values[0] = ~0;
230       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
231       fib->adj_index_by_dst_address[dst_address_length] = hash;
232
233       if (vec_len (im->add_del_route_callbacks) > 0
234           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
235         {
236           fib->new_hash_values[0] = ~0;
237           vec_foreach (cb, im->add_del_route_callbacks)
238             if ((a->flags & cb->required_flags) == cb->required_flags)
239               cb->function (im, cb->function_opaque,
240                             fib, a->flags,
241                             &a->dst_address, dst_address_length,
242                             fib->old_hash_values,
243                             fib->new_hash_values);
244         }
245     }
246   else
247     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
248                            adj_index);
249
250   old_adj_index = fib->old_hash_values[0];
251
252   /* Avoid spurious reference count increments */
253   if (old_adj_index == adj_index
254       && adj_index != ~0
255       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
256     {
257       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
258       if (adj->share_count > 0)
259         adj->share_count --;
260     }
261
262   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
263                                is_del ? old_adj_index : adj_index,
264                                is_del);
265
266   /* Delete old adjacency index if present and changed. */
267   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
268       && old_adj_index != ~0
269       && old_adj_index != adj_index)
270     ip_del_adjacency (lm, old_adj_index);
271 }
272
273 void
274 ip4_add_del_route_next_hop (ip4_main_t * im,
275                             u32 flags,
276                             ip4_address_t * dst_address,
277                             u32 dst_address_length,
278                             ip4_address_t * next_hop,
279                             u32 next_hop_sw_if_index,
280                             u32 next_hop_weight, u32 adj_index, 
281                             u32 explicit_fib_index)
282 {
283   vnet_main_t * vnm = vnet_get_main();
284   ip_lookup_main_t * lm = &im->lookup_main;
285   u32 fib_index;
286   ip4_fib_t * fib;
287   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
288   u32 dst_adj_index, nh_adj_index;
289   uword * dst_hash, * dst_result;
290   uword * nh_hash, * nh_result;
291   ip_adjacency_t * dst_adj;
292   ip_multipath_adjacency_t * old_mp, * new_mp;
293   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
294   int is_interface_next_hop;
295   clib_error_t * error = 0;
296
297   if (explicit_fib_index == (u32)~0)
298       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
299   else
300       fib_index = explicit_fib_index;
301
302   fib = vec_elt_at_index (im->fibs, fib_index);
303   
304   /* Lookup next hop to be added or deleted. */
305   is_interface_next_hop = next_hop->data_u32 == 0;
306   if (adj_index == (u32)~0)
307     {
308       if (is_interface_next_hop)
309         {
310           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
311           if (nh_result)
312             nh_adj_index = *nh_result;
313           else
314             {
315               ip_adjacency_t * adj;
316               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
317                                       &nh_adj_index);
318               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
319               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
320               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
321             }
322         }
323       else
324         {
325           nh_hash = fib->adj_index_by_dst_address[32];
326           nh_result = hash_get (nh_hash, next_hop->data_u32);
327           
328           /* Next hop must be known. */
329           if (! nh_result)
330             {
331               ip_adjacency_t * adj;
332
333               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
334                                                         next_hop, 0);
335               adj = ip_get_adjacency (lm, nh_adj_index);
336               /* if ARP interface adjacencty is present, we need to
337                  install ARP adjaceny for specific next hop */
338               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
339                   adj->arp.next_hop.ip4.as_u32 == 0)
340                 {
341                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
342                 }
343               else
344                 {
345                   /* Next hop is not known, so create indirect adj */
346                   ip_adjacency_t add_adj;
347                   memset (&add_adj, 0, sizeof(add_adj));
348                   add_adj.n_adj = 1;
349                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
350                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
351                   add_adj.explicit_fib_index = explicit_fib_index;
352                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
353                 }
354             }
355           else
356             nh_adj_index = *nh_result;
357         }
358     }
359   else
360     {
361       nh_adj_index = adj_index;
362     }
363   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
364   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
365
366   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
367   dst_result = hash_get (dst_hash, dst_address_u32);
368   if (dst_result)
369     {
370       dst_adj_index = dst_result[0];
371       dst_adj = ip_get_adjacency (lm, dst_adj_index);
372     }
373   else
374     {
375       /* For deletes destination must be known. */
376       if (is_del)
377         {
378           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
379           error = clib_error_return (0, "unknown destination %U/%d",
380                                      format_ip4_address, dst_address,
381                                      dst_address_length);
382           goto done;
383         }
384
385       dst_adj_index = ~0;
386       dst_adj = 0;
387     }
388
389   /* Ignore adds of X/32 with next hop of X. */
390   if (! is_del
391       && dst_address_length == 32
392       && dst_address->data_u32 == next_hop->data_u32 
393       && adj_index != (u32)~0)
394     {
395       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
396       error = clib_error_return (0, "prefix matches next hop %U/%d",
397                                  format_ip4_address, dst_address,
398                                  dst_address_length);
399       goto done;
400     }
401
402   /* Destination is not known and default weight is set so add route
403      to existing non-multipath adjacency */
404   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
405     {
406       /* create new adjacency */
407       ip4_add_del_route_args_t a;
408       a.table_index_or_table_id = fib_index;
409       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
410                  | IP4_ROUTE_FLAG_FIB_INDEX
411                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
412                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
413                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
414       a.dst_address = dst_address[0];
415       a.dst_address_length = dst_address_length;
416       a.adj_index = nh_adj_index;
417       a.add_adj = 0;
418       a.n_add_adj = 0;
419
420       ip4_add_del_route (im, &a);
421
422       goto done;
423     }
424
425   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
426
427   if (! ip_multipath_adjacency_add_del_next_hop
428       (lm, is_del,
429        old_mp_adj_index,
430        nh_adj_index,
431        next_hop_weight,
432        &new_mp_adj_index))
433     {
434       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
435       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
436                                  format_ip4_address, next_hop);
437       goto done;
438     }
439   
440   old_mp = new_mp = 0;
441   if (old_mp_adj_index != ~0)
442     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
443   if (new_mp_adj_index != ~0)
444     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
445
446   if (old_mp != new_mp)
447     {
448       ip4_add_del_route_args_t a;
449       a.table_index_or_table_id = fib_index;
450       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
451                  | IP4_ROUTE_FLAG_FIB_INDEX
452                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
453                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
454       a.dst_address = dst_address[0];
455       a.dst_address_length = dst_address_length;
456       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
457       a.add_adj = 0;
458       a.n_add_adj = 0;
459
460       ip4_add_del_route (im, &a);
461     }
462
463  done:
464   if (error)
465     clib_error_report (error);
466 }
467
468 void *
469 ip4_get_route (ip4_main_t * im,
470                u32 table_index_or_table_id,
471                u32 flags,
472                u8 * address,
473                u32 address_length)
474 {
475   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
476   u32 dst_address = * (u32 *) address;
477   uword * hash, * p;
478
479   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
480   dst_address &= im->fib_masks[address_length];
481
482   hash = fib->adj_index_by_dst_address[address_length];
483   p = hash_get (hash, dst_address);
484   return (void *) p;
485 }
486
487 void
488 ip4_foreach_matching_route (ip4_main_t * im,
489                             u32 table_index_or_table_id,
490                             u32 flags,
491                             ip4_address_t * address,
492                             u32 address_length,
493                             ip4_address_t ** results,
494                             u8 ** result_lengths)
495 {
496   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
497   u32 dst_address = address->data_u32;
498   u32 this_length = address_length;
499   
500   if (*results)
501     _vec_len (*results) = 0;
502   if (*result_lengths)
503     _vec_len (*result_lengths) = 0;
504
505   while (this_length <= 32 && vec_len (results) == 0)
506     {
507       uword k, v;
508       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
509         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
510           {
511             ip4_address_t a;
512             a.data_u32 = k;
513             vec_add1 (*results, a);
514             vec_add1 (*result_lengths, this_length);
515           }
516       }));
517
518       this_length++;
519     }
520 }
521
522 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
523                                   u32 table_index_or_table_id,
524                                   u32 flags)
525 {
526   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
527   ip_lookup_main_t * lm = &im->lookup_main;
528   u32 i, l;
529   ip4_address_t a;
530   ip4_add_del_route_callback_t * cb;
531   static ip4_address_t * to_delete;
532
533   if (lm->n_adjacency_remaps == 0)
534     return;
535
536   for (l = 0; l <= 32; l++)
537     {
538       hash_pair_t * p;
539       uword * hash = fib->adj_index_by_dst_address[l];
540
541       if (hash_elts (hash) == 0)
542         continue;
543
544       if (to_delete)
545         _vec_len (to_delete) = 0;
546
547       hash_foreach_pair (p, hash, ({
548         u32 adj_index = p->value[0];
549         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
550
551         if (m)
552           {
553             /* Record destination address from hash key. */
554             a.data_u32 = p->key;
555
556             /* New adjacency points to nothing: so delete prefix. */
557             if (m == ~0)
558               vec_add1 (to_delete, a);
559             else
560               {
561                 /* Remap to new adjacency. */
562                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
563
564                 /* Set new adjacency value. */
565                 fib->new_hash_values[0] = p->value[0] = m - 1;
566
567                 vec_foreach (cb, im->add_del_route_callbacks)
568                   if ((flags & cb->required_flags) == cb->required_flags)
569                     cb->function (im, cb->function_opaque,
570                                   fib, flags | IP4_ROUTE_FLAG_ADD,
571                                   &a, l,
572                                   fib->old_hash_values,
573                                   fib->new_hash_values);
574               }
575           }
576       }));
577
578       fib->new_hash_values[0] = ~0;
579       for (i = 0; i < vec_len (to_delete); i++)
580         {
581           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
582           vec_foreach (cb, im->add_del_route_callbacks)
583             if ((flags & cb->required_flags) == cb->required_flags)
584               cb->function (im, cb->function_opaque,
585                             fib, flags | IP4_ROUTE_FLAG_DEL,
586                             &a, l,
587                             fib->old_hash_values,
588                             fib->new_hash_values);
589         }
590     }
591
592   /* Also remap adjacencies in mtrie. */
593   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
594
595   /* Reset mapping table. */
596   vec_zero (lm->adjacency_remap_table);
597
598   /* All remaps have been performed. */
599   lm->n_adjacency_remaps = 0;
600 }
601
602 void ip4_delete_matching_routes (ip4_main_t * im,
603                                  u32 table_index_or_table_id,
604                                  u32 flags,
605                                  ip4_address_t * address,
606                                  u32 address_length)
607 {
608   static ip4_address_t * matching_addresses;
609   static u8 * matching_address_lengths;
610   u32 l, i;
611   ip4_add_del_route_args_t a;
612
613   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
614   a.table_index_or_table_id = table_index_or_table_id;
615   a.adj_index = ~0;
616   a.add_adj = 0;
617   a.n_add_adj = 0;
618
619   for (l = address_length + 1; l <= 32; l++)
620     {
621       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
622                                   address,
623                                   l,
624                                   &matching_addresses,
625                                   &matching_address_lengths);
626       for (i = 0; i < vec_len (matching_addresses); i++)
627         {
628           a.dst_address = matching_addresses[i];
629           a.dst_address_length = matching_address_lengths[i];
630           ip4_add_del_route (im, &a);
631         }
632     }
633
634   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
635 }
636
637 void
638 ip4_forward_next_trace (vlib_main_t * vm,
639                         vlib_node_runtime_t * node,
640                         vlib_frame_t * frame,
641                         vlib_rx_or_tx_t which_adj_index);
642
643 always_inline uword
644 ip4_lookup_inline (vlib_main_t * vm,
645                    vlib_node_runtime_t * node,
646                    vlib_frame_t * frame,
647                    int lookup_for_responses_to_locally_received_packets,
648                    int is_indirect)
649 {
650   ip4_main_t * im = &ip4_main;
651   ip_lookup_main_t * lm = &im->lookup_main;
652   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
653   u32 n_left_from, n_left_to_next, * from, * to_next;
654   ip_lookup_next_t next;
655   u32 cpu_index = os_get_cpu_number();
656
657   from = vlib_frame_vector_args (frame);
658   n_left_from = frame->n_vectors;
659   next = node->cached_next_index;
660
661   while (n_left_from > 0)
662     {
663       vlib_get_next_frame (vm, node, next,
664                            to_next, n_left_to_next);
665
666       while (n_left_from >= 4 && n_left_to_next >= 2)
667         {
668           vlib_buffer_t * p0, * p1;
669           ip4_header_t * ip0, * ip1;
670           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
671           ip_lookup_next_t next0, next1;
672           ip_adjacency_t * adj0, * adj1;
673           ip4_fib_mtrie_t * mtrie0, * mtrie1;
674           ip4_fib_mtrie_leaf_t leaf0, leaf1;
675           ip4_address_t * dst_addr0, *dst_addr1;
676           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
677           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
678           u32 flow_hash_config0, flow_hash_config1;
679           u32 hash_c0, hash_c1;
680           u32 wrong_next;
681
682           /* Prefetch next iteration. */
683           {
684             vlib_buffer_t * p2, * p3;
685
686             p2 = vlib_get_buffer (vm, from[2]);
687             p3 = vlib_get_buffer (vm, from[3]);
688
689             vlib_prefetch_buffer_header (p2, LOAD);
690             vlib_prefetch_buffer_header (p3, LOAD);
691
692             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
693             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
694           }
695
696           pi0 = to_next[0] = from[0];
697           pi1 = to_next[1] = from[1];
698
699           p0 = vlib_get_buffer (vm, pi0);
700           p1 = vlib_get_buffer (vm, pi1);
701
702           ip0 = vlib_buffer_get_current (p0);
703           ip1 = vlib_buffer_get_current (p1);
704
705           if (is_indirect)
706             {
707               ip_adjacency_t * iadj0, * iadj1;
708               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
709               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
710               dst_addr0 = &iadj0->indirect.next_hop.ip4;
711               dst_addr1 = &iadj1->indirect.next_hop.ip4;
712             }
713           else
714             {
715               dst_addr0 = &ip0->dst_address;
716               dst_addr1 = &ip1->dst_address;
717             }
718
719           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
720           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
721           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
722             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
723           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
724             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
725
726
727           if (! lookup_for_responses_to_locally_received_packets)
728             {
729               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
730               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
731
732               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
733
734               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
735               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
736             }
737
738           tcp0 = (void *) (ip0 + 1);
739           tcp1 = (void *) (ip1 + 1);
740
741           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
742                          || ip0->protocol == IP_PROTOCOL_UDP);
743           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
744                          || ip1->protocol == IP_PROTOCOL_UDP);
745
746           if (! lookup_for_responses_to_locally_received_packets)
747             {
748               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
749               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
750             }
751
752           if (! lookup_for_responses_to_locally_received_packets)
753             {
754               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
755               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
756             }
757
758           if (! lookup_for_responses_to_locally_received_packets)
759             {
760               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
761               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
762             }
763
764           if (lookup_for_responses_to_locally_received_packets)
765             {
766               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
767               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
768             }
769           else
770             {
771               /* Handle default route. */
772               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
773               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
774
775               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
776               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
777             }
778
779           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
780                                                            dst_addr0,
781                                                            /* no_default_route */ 0));
782           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
783                                                            dst_addr1,
784                                                            /* no_default_route */ 0));
785           adj0 = ip_get_adjacency (lm, adj_index0);
786           adj1 = ip_get_adjacency (lm, adj_index1);
787
788           next0 = adj0->lookup_next_index;
789           next1 = adj1->lookup_next_index;
790
791           /* Use flow hash to compute multipath adjacency. */
792           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
793           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
794           if (PREDICT_FALSE (adj0->n_adj > 1))
795             {
796               flow_hash_config0 = 
797                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
798               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
799                 ip4_compute_flow_hash (ip0, flow_hash_config0);
800             }
801           if (PREDICT_FALSE(adj1->n_adj > 1))
802             {
803               flow_hash_config1 = 
804                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
805               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
806                 ip4_compute_flow_hash (ip1, flow_hash_config1);
807             }
808
809           ASSERT (adj0->n_adj > 0);
810           ASSERT (adj1->n_adj > 0);
811           ASSERT (is_pow2 (adj0->n_adj));
812           ASSERT (is_pow2 (adj1->n_adj));
813           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
814           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
815
816           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
817           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
818
819           vlib_increment_combined_counter 
820               (cm, cpu_index, adj_index0, 1,
821                vlib_buffer_length_in_chain (vm, p0) 
822                + sizeof(ethernet_header_t));
823           vlib_increment_combined_counter 
824               (cm, cpu_index, adj_index1, 1,
825                vlib_buffer_length_in_chain (vm, p1)
826                + sizeof(ethernet_header_t));
827
828           from += 2;
829           to_next += 2;
830           n_left_to_next -= 2;
831           n_left_from -= 2;
832
833           wrong_next = (next0 != next) + 2*(next1 != next);
834           if (PREDICT_FALSE (wrong_next != 0))
835             {
836               switch (wrong_next)
837                 {
838                 case 1:
839                   /* A B A */
840                   to_next[-2] = pi1;
841                   to_next -= 1;
842                   n_left_to_next += 1;
843                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
844                   break;
845
846                 case 2:
847                   /* A A B */
848                   to_next -= 1;
849                   n_left_to_next += 1;
850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
851                   break;
852
853                 case 3:
854                   /* A B C */
855                   to_next -= 2;
856                   n_left_to_next += 2;
857                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
858                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
859                   if (next0 == next1)
860                     {
861                       /* A B B */
862                       vlib_put_next_frame (vm, node, next, n_left_to_next);
863                       next = next1;
864                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
865                     }
866                 }
867             }
868         }
869     
870       while (n_left_from > 0 && n_left_to_next > 0)
871         {
872           vlib_buffer_t * p0;
873           ip4_header_t * ip0;
874           __attribute__((unused)) tcp_header_t * tcp0;
875           ip_lookup_next_t next0;
876           ip_adjacency_t * adj0;
877           ip4_fib_mtrie_t * mtrie0;
878           ip4_fib_mtrie_leaf_t leaf0;
879           ip4_address_t * dst_addr0;
880           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
881           u32 flow_hash_config0, hash_c0;
882
883           pi0 = from[0];
884           to_next[0] = pi0;
885
886           p0 = vlib_get_buffer (vm, pi0);
887
888           ip0 = vlib_buffer_get_current (p0);
889
890           if (is_indirect)
891             {
892               ip_adjacency_t * iadj0;
893               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
894               dst_addr0 = &iadj0->indirect.next_hop.ip4;
895             }
896           else
897             {
898               dst_addr0 = &ip0->dst_address;
899             }
900
901           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
902           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
903             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
904
905           if (! lookup_for_responses_to_locally_received_packets)
906             {
907               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
908
909               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
910
911               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
912             }
913
914           tcp0 = (void *) (ip0 + 1);
915
916           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
917                          || ip0->protocol == IP_PROTOCOL_UDP);
918
919           if (! lookup_for_responses_to_locally_received_packets)
920             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
921
922           if (! lookup_for_responses_to_locally_received_packets)
923             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
924
925           if (! lookup_for_responses_to_locally_received_packets)
926             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
927
928           if (lookup_for_responses_to_locally_received_packets)
929             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
930           else
931             {
932               /* Handle default route. */
933               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
934               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
935             }
936
937           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
938                                                            dst_addr0,
939                                                            /* no_default_route */ 0));
940
941           adj0 = ip_get_adjacency (lm, adj_index0);
942
943           next0 = adj0->lookup_next_index;
944
945           /* Use flow hash to compute multipath adjacency. */
946           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
947           if (PREDICT_FALSE(adj0->n_adj > 1))
948             {
949               flow_hash_config0 = 
950                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
951
952               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
953                 ip4_compute_flow_hash (ip0, flow_hash_config0);
954             }
955
956           ASSERT (adj0->n_adj > 0);
957           ASSERT (is_pow2 (adj0->n_adj));
958           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
959
960           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
961
962           vlib_increment_combined_counter 
963               (cm, cpu_index, adj_index0, 1,
964                vlib_buffer_length_in_chain (vm, p0)
965                + sizeof(ethernet_header_t));
966
967           from += 1;
968           to_next += 1;
969           n_left_to_next -= 1;
970           n_left_from -= 1;
971
972           if (PREDICT_FALSE (next0 != next))
973             {
974               n_left_to_next += 1;
975               vlib_put_next_frame (vm, node, next, n_left_to_next);
976               next = next0;
977               vlib_get_next_frame (vm, node, next,
978                                    to_next, n_left_to_next);
979               to_next[0] = pi0;
980               to_next += 1;
981               n_left_to_next -= 1;
982             }
983         }
984
985       vlib_put_next_frame (vm, node, next, n_left_to_next);
986     }
987
988   if (node->flags & VLIB_NODE_FLAG_TRACE)
989     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
990
991   return frame->n_vectors;
992 }
993
994 static uword
995 ip4_lookup (vlib_main_t * vm,
996             vlib_node_runtime_t * node,
997             vlib_frame_t * frame)
998 {
999   return ip4_lookup_inline (vm, node, frame,
1000                             /* lookup_for_responses_to_locally_received_packets */ 0,
1001                             /* is_indirect */ 0);
1002
1003 }
1004
1005 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1006                                         ip_adjacency_t * adj,
1007                                         u32 sw_if_index,
1008                                         u32 if_address_index)
1009 {
1010   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1011   ip_lookup_next_t n;
1012   vnet_l3_packet_type_t packet_type;
1013   u32 node_index;
1014
1015   if (hw->hw_class_index == ethernet_hw_interface_class.index
1016       || hw->hw_class_index == srp_hw_interface_class.index)
1017     {
1018       /* 
1019        * We have a bit of a problem in this case. ip4-arp uses
1020        * the rewrite_header.next_index to hand pkts to the
1021        * indicated inteface output node. We can end up in
1022        * ip4_rewrite_local, too, which also pays attention to 
1023        * rewrite_header.next index. Net result: a hack in
1024        * ip4_rewrite_local...
1025        */
1026       n = IP_LOOKUP_NEXT_ARP;
1027       node_index = ip4_arp_node.index;
1028       adj->if_address_index = if_address_index;
1029       adj->arp.next_hop.ip4.as_u32 = 0;
1030       ip46_address_reset(&adj->arp.next_hop);
1031       packet_type = VNET_L3_PACKET_TYPE_ARP;
1032     }
1033   else
1034     {
1035       n = IP_LOOKUP_NEXT_REWRITE;
1036       node_index = ip4_rewrite_node.index;
1037       packet_type = VNET_L3_PACKET_TYPE_IP4;
1038     }
1039
1040   adj->lookup_next_index = n;
1041   vnet_rewrite_for_sw_interface
1042     (vnm,
1043      packet_type,
1044      sw_if_index,
1045      node_index,
1046      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1047      &adj->rewrite_header,
1048      sizeof (adj->rewrite_data));
1049 }
1050
1051 static void
1052 ip4_add_interface_routes (u32 sw_if_index,
1053                           ip4_main_t * im, u32 fib_index,
1054                           ip_interface_address_t * a)
1055 {
1056   vnet_main_t * vnm = vnet_get_main();
1057   ip_lookup_main_t * lm = &im->lookup_main;
1058   ip_adjacency_t * adj;
1059   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1060   ip4_add_del_route_args_t x;
1061   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1062   u32 classify_table_index;
1063
1064   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1065   x.table_index_or_table_id = fib_index;
1066   x.flags = (IP4_ROUTE_FLAG_ADD
1067              | IP4_ROUTE_FLAG_FIB_INDEX
1068              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1069   x.dst_address = address[0];
1070   x.dst_address_length = a->address_length;
1071   x.n_add_adj = 0;
1072   x.add_adj = 0;
1073
1074   a->neighbor_probe_adj_index = ~0;
1075   if (a->address_length < 32)
1076     {
1077       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1078                               &x.adj_index);
1079       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1080       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1081       ip4_add_del_route (im, &x);
1082       a->neighbor_probe_adj_index = x.adj_index;
1083     }
1084   
1085   /* Add e.g. 1.1.1.1/32 as local to this host. */
1086   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1087                           &x.adj_index);
1088   
1089   classify_table_index = ~0;
1090   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1091     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1092   if (classify_table_index != (u32) ~0)
1093     {
1094       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1095       adj->classify.table_index = classify_table_index;
1096     }
1097   else
1098     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1099   
1100   adj->if_address_index = a - lm->if_address_pool;
1101   adj->rewrite_header.sw_if_index = sw_if_index;
1102   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1103   /* 
1104    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1105    * fail an RPF-ish check, but still go thru the rewrite code...
1106    */
1107   adj->rewrite_header.data_bytes = 0;
1108
1109   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1110   x.dst_address_length = 32;
1111   ip4_add_del_route (im, &x);
1112 }
1113
1114 static void
1115 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1116 {
1117   ip4_add_del_route_args_t x;
1118
1119   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1120   x.table_index_or_table_id = fib_index;
1121   x.flags = (IP4_ROUTE_FLAG_DEL
1122              | IP4_ROUTE_FLAG_FIB_INDEX
1123              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1124   x.dst_address = address[0];
1125   x.dst_address_length = address_length;
1126   x.adj_index = ~0;
1127   x.n_add_adj = 0;
1128   x.add_adj = 0;
1129
1130   if (address_length < 32)
1131     ip4_add_del_route (im, &x);
1132
1133   x.dst_address_length = 32;
1134   ip4_add_del_route (im, &x);
1135
1136   ip4_delete_matching_routes (im,
1137                               fib_index,
1138                               IP4_ROUTE_FLAG_FIB_INDEX,
1139                               address,
1140                               address_length);
1141 }
1142
1143 typedef struct {
1144     u32 sw_if_index;
1145     ip4_address_t address;
1146     u32 length;
1147 } ip4_interface_address_t;
1148
1149 static clib_error_t *
1150 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1151                                         u32 sw_if_index,
1152                                         ip4_address_t * new_address,
1153                                         u32 new_length,
1154                                         u32 redistribute,
1155                                         u32 insert_routes,
1156                                         u32 is_del);
1157
1158 static clib_error_t *
1159 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1160                                         u32 sw_if_index,
1161                                         ip4_address_t * address,
1162                                         u32 address_length,
1163                                         u32 redistribute,
1164                                         u32 insert_routes,
1165                                         u32 is_del)
1166 {
1167   vnet_main_t * vnm = vnet_get_main();
1168   ip4_main_t * im = &ip4_main;
1169   ip_lookup_main_t * lm = &im->lookup_main;
1170   clib_error_t * error = 0;
1171   u32 if_address_index, elts_before;
1172   ip4_address_fib_t ip4_af, * addr_fib = 0;
1173
1174   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1175   ip4_addr_fib_init (&ip4_af, address,
1176                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1177   vec_add1 (addr_fib, ip4_af);
1178
1179   /* When adding an address check that it does not conflict with an existing address. */
1180   if (! is_del)
1181     {
1182       ip_interface_address_t * ia;
1183       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1184                                     0 /* honor unnumbered */,
1185       ({
1186         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1187
1188         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1189             || ip4_destination_matches_route (im, x, address, address_length))
1190           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1191                                     format_ip4_address_and_length, address, address_length,
1192                                     format_ip4_address_and_length, x, ia->address_length,
1193                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1194       }));
1195     }
1196
1197   elts_before = pool_elts (lm->if_address_pool);
1198
1199   error = ip_interface_address_add_del
1200     (lm,
1201      sw_if_index,
1202      addr_fib,
1203      address_length,
1204      is_del,
1205      &if_address_index);
1206   if (error)
1207     goto done;
1208   
1209   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1210     {
1211       if (is_del)
1212         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1213                                   address_length);
1214       
1215       else
1216           ip4_add_interface_routes (sw_if_index,
1217                                     im, ip4_af.fib_index,
1218                                     pool_elt_at_index 
1219                                     (lm->if_address_pool, if_address_index));
1220     }
1221
1222   /* If pool did not grow/shrink: add duplicate address. */
1223   if (elts_before != pool_elts (lm->if_address_pool))
1224     {
1225       ip4_add_del_interface_address_callback_t * cb;
1226       vec_foreach (cb, im->add_del_interface_address_callbacks)
1227         cb->function (im, cb->function_opaque, sw_if_index,
1228                       address, address_length,
1229                       if_address_index,
1230                       is_del);
1231     }
1232
1233  done:
1234   vec_free (addr_fib);
1235   return error;
1236 }
1237
1238 clib_error_t *
1239 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1240                                ip4_address_t * address, u32 address_length,
1241                                u32 is_del)
1242 {
1243   return ip4_add_del_interface_address_internal
1244     (vm, sw_if_index, address, address_length,
1245      /* redistribute */ 1,
1246      /* insert_routes */ 1,
1247      is_del);
1248 }
1249
1250 static clib_error_t *
1251 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1252                                 u32 sw_if_index,
1253                                 u32 flags)
1254 {
1255   ip4_main_t * im = &ip4_main;
1256   ip_interface_address_t * ia;
1257   ip4_address_t * a;
1258   u32 is_admin_up, fib_index;
1259   
1260   /* Fill in lookup tables with default table (0). */
1261   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1262   
1263   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1264   
1265   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1266   
1267   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1268
1269   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1270                                 0 /* honor unnumbered */,
1271   ({
1272     a = ip_interface_address_get_address (&im->lookup_main, ia);
1273     if (is_admin_up)
1274       ip4_add_interface_routes (sw_if_index,
1275                                 im, fib_index,
1276                                 ia);
1277     else
1278       ip4_del_interface_routes (im, fib_index,
1279                                 a, ia->address_length);
1280   }));
1281
1282   return 0;
1283 }
1284  
1285 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1286
1287 static clib_error_t *
1288 ip4_sw_interface_add_del (vnet_main_t * vnm,
1289                           u32 sw_if_index,
1290                           u32 is_add)
1291 {
1292   vlib_main_t * vm = vnm->vlib_main;
1293   ip4_main_t * im = &ip4_main;
1294   ip_lookup_main_t * lm = &im->lookup_main;
1295   u32 ci, cast;
1296
1297   for (cast = 0; cast < VNET_N_CAST; cast++)
1298     {
1299       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1300       vnet_config_main_t * vcm = &cm->config_main;
1301
1302       if (! vcm->node_index_by_feature_index)
1303         {
1304           if (cast == VNET_UNICAST)
1305             {
1306               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1307               static char * feature_nodes[] = {
1308                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1309                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1310                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1311                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1312                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1313                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1314               };
1315
1316               vnet_config_init (vm, vcm,
1317                                 start_nodes, ARRAY_LEN (start_nodes),
1318                                 feature_nodes, ARRAY_LEN (feature_nodes));
1319             }
1320           else
1321             {
1322               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1323               static char * feature_nodes[] = {
1324                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1325                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1326               };
1327
1328               vnet_config_init (vm, vcm,
1329                                 start_nodes, ARRAY_LEN (start_nodes),
1330                                 feature_nodes, ARRAY_LEN (feature_nodes));
1331             }
1332         }
1333
1334       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1335       ci = cm->config_index_by_sw_if_index[sw_if_index];
1336
1337       if (is_add)
1338         ci = vnet_config_add_feature (vm, vcm,
1339                                       ci,
1340                                       IP4_RX_FEATURE_LOOKUP,
1341                                       /* config data */ 0,
1342                                       /* # bytes of config data */ 0);
1343       else
1344         ci = vnet_config_del_feature (vm, vcm,
1345                                       ci,
1346                                       IP4_RX_FEATURE_LOOKUP,
1347                                       /* config data */ 0,
1348                                       /* # bytes of config data */ 0);
1349
1350       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1351     }
1352
1353   return /* no error */ 0;
1354 }
1355
1356 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1357
1358 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1359
1360 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1361   .function = ip4_lookup,
1362   .name = "ip4-lookup",
1363   .vector_size = sizeof (u32),
1364
1365   .format_trace = format_ip4_lookup_trace,
1366
1367   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1368   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1369 };
1370
1371 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1372
1373 static uword
1374 ip4_indirect (vlib_main_t * vm,
1375                vlib_node_runtime_t * node,
1376                vlib_frame_t * frame)
1377 {
1378   return ip4_lookup_inline (vm, node, frame,
1379                             /* lookup_for_responses_to_locally_received_packets */ 0,
1380                             /* is_indirect */ 1);
1381 }
1382
1383 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1384   .function = ip4_indirect,
1385   .name = "ip4-indirect",
1386   .vector_size = sizeof (u32),
1387   .sibling_of = "ip4-lookup",
1388   .format_trace = format_ip4_lookup_trace,
1389
1390   .n_next_nodes = 0,
1391 };
1392
1393 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1394
1395
1396 /* Global IP4 main. */
1397 ip4_main_t ip4_main;
1398
1399 clib_error_t *
1400 ip4_lookup_init (vlib_main_t * vm)
1401 {
1402   ip4_main_t * im = &ip4_main;
1403   uword i;
1404
1405   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1406     {
1407       u32 m;
1408
1409       if (i < 32)
1410         m = pow2_mask (i) << (32 - i);
1411       else 
1412         m = ~0;
1413       im->fib_masks[i] = clib_host_to_net_u32 (m);
1414     }
1415
1416   /* Create FIB with index 0 and table id of 0. */
1417   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1418
1419   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1420
1421   {
1422     pg_node_t * pn;
1423     pn = pg_get_node (ip4_lookup_node.index);
1424     pn->unformat_edit = unformat_pg_ip4_header;
1425   }
1426
1427   {
1428     ethernet_arp_header_t h;
1429
1430     memset (&h, 0, sizeof (h));
1431
1432     /* Set target ethernet address to all zeros. */
1433     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1434
1435 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1436 #define _8(f,v) h.f = v;
1437     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1438     _16 (l3_type, ETHERNET_TYPE_IP4);
1439     _8 (n_l2_address_bytes, 6);
1440     _8 (n_l3_address_bytes, 4);
1441     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1442 #undef _16
1443 #undef _8
1444
1445     vlib_packet_template_init (vm,
1446                                &im->ip4_arp_request_packet_template,
1447                                /* data */ &h,
1448                                sizeof (h),
1449                                /* alloc chunk size */ 8,
1450                                "ip4 arp");
1451   }
1452
1453   return 0;
1454 }
1455
1456 VLIB_INIT_FUNCTION (ip4_lookup_init);
1457
1458 typedef struct {
1459   /* Adjacency taken. */
1460   u32 adj_index;
1461   u32 flow_hash;
1462   u32 fib_index;
1463
1464   /* Packet data, possibly *after* rewrite. */
1465   u8 packet_data[64 - 1*sizeof(u32)];
1466 } ip4_forward_next_trace_t;
1467
1468 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1469 {
1470   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1471   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1472   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1473   uword indent = format_get_indent (s);
1474   s = format (s, "%U%U",
1475                 format_white_space, indent,
1476                 format_ip4_header, t->packet_data);
1477   return s;
1478 }
1479
1480 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1481 {
1482   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1483   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1484   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1485   vnet_main_t * vnm = vnet_get_main();
1486   ip4_main_t * im = &ip4_main;
1487   uword indent = format_get_indent (s);
1488
1489   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1490               t->fib_index, t->adj_index, format_ip_adjacency,
1491               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1492   s = format (s, "\n%U%U",
1493               format_white_space, indent,
1494               format_ip4_header, t->packet_data);
1495   return s;
1496 }
1497
1498 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1499 {
1500   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1501   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1502   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1503   vnet_main_t * vnm = vnet_get_main();
1504   ip4_main_t * im = &ip4_main;
1505   uword indent = format_get_indent (s);
1506
1507   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1508               t->fib_index, t->adj_index, format_ip_adjacency,
1509               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1510   s = format (s, "\n%U%U",
1511               format_white_space, indent,
1512               format_ip_adjacency_packet_data,
1513               vnm, &im->lookup_main, t->adj_index,
1514               t->packet_data, sizeof (t->packet_data));
1515   return s;
1516 }
1517
1518 /* Common trace function for all ip4-forward next nodes. */
1519 void
1520 ip4_forward_next_trace (vlib_main_t * vm,
1521                         vlib_node_runtime_t * node,
1522                         vlib_frame_t * frame,
1523                         vlib_rx_or_tx_t which_adj_index)
1524 {
1525   u32 * from, n_left;
1526   ip4_main_t * im = &ip4_main;
1527
1528   n_left = frame->n_vectors;
1529   from = vlib_frame_vector_args (frame);
1530   
1531   while (n_left >= 4)
1532     {
1533       u32 bi0, bi1;
1534       vlib_buffer_t * b0, * b1;
1535       ip4_forward_next_trace_t * t0, * t1;
1536
1537       /* Prefetch next iteration. */
1538       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1539       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1540
1541       bi0 = from[0];
1542       bi1 = from[1];
1543
1544       b0 = vlib_get_buffer (vm, bi0);
1545       b1 = vlib_get_buffer (vm, bi1);
1546
1547       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1548         {
1549           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1550           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1551           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1552           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1553               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1554               vec_elt (im->fib_index_by_sw_if_index,
1555                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1556
1557           clib_memcpy (t0->packet_data,
1558                   vlib_buffer_get_current (b0),
1559                   sizeof (t0->packet_data));
1560         }
1561       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1562         {
1563           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1564           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1565           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1566           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1567               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1568               vec_elt (im->fib_index_by_sw_if_index,
1569                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1570           clib_memcpy (t1->packet_data,
1571                   vlib_buffer_get_current (b1),
1572                   sizeof (t1->packet_data));
1573         }
1574       from += 2;
1575       n_left -= 2;
1576     }
1577
1578   while (n_left >= 1)
1579     {
1580       u32 bi0;
1581       vlib_buffer_t * b0;
1582       ip4_forward_next_trace_t * t0;
1583
1584       bi0 = from[0];
1585
1586       b0 = vlib_get_buffer (vm, bi0);
1587
1588       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1589         {
1590           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1591           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1592           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1593           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1594               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1595               vec_elt (im->fib_index_by_sw_if_index,
1596                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1597           clib_memcpy (t0->packet_data,
1598                   vlib_buffer_get_current (b0),
1599                   sizeof (t0->packet_data));
1600         }
1601       from += 1;
1602       n_left -= 1;
1603     }
1604 }
1605
1606 static uword
1607 ip4_drop_or_punt (vlib_main_t * vm,
1608                   vlib_node_runtime_t * node,
1609                   vlib_frame_t * frame,
1610                   ip4_error_t error_code)
1611 {
1612   u32 * buffers = vlib_frame_vector_args (frame);
1613   uword n_packets = frame->n_vectors;
1614
1615   vlib_error_drop_buffers (vm, node,
1616                            buffers,
1617                            /* stride */ 1,
1618                            n_packets,
1619                            /* next */ 0,
1620                            ip4_input_node.index,
1621                            error_code);
1622
1623   if (node->flags & VLIB_NODE_FLAG_TRACE)
1624     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1625
1626   return n_packets;
1627 }
1628
1629 static uword
1630 ip4_drop (vlib_main_t * vm,
1631           vlib_node_runtime_t * node,
1632           vlib_frame_t * frame)
1633 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1634
1635 static uword
1636 ip4_punt (vlib_main_t * vm,
1637           vlib_node_runtime_t * node,
1638           vlib_frame_t * frame)
1639 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1640
1641 static uword
1642 ip4_miss (vlib_main_t * vm,
1643           vlib_node_runtime_t * node,
1644           vlib_frame_t * frame)
1645 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1646
1647 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1648   .function = ip4_drop,
1649   .name = "ip4-drop",
1650   .vector_size = sizeof (u32),
1651
1652   .format_trace = format_ip4_forward_next_trace,
1653
1654   .n_next_nodes = 1,
1655   .next_nodes = {
1656     [0] = "error-drop",
1657   },
1658 };
1659
1660 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1661
1662 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1663   .function = ip4_punt,
1664   .name = "ip4-punt",
1665   .vector_size = sizeof (u32),
1666
1667   .format_trace = format_ip4_forward_next_trace,
1668
1669   .n_next_nodes = 1,
1670   .next_nodes = {
1671     [0] = "error-punt",
1672   },
1673 };
1674
1675 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1676
1677 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1678   .function = ip4_miss,
1679   .name = "ip4-miss",
1680   .vector_size = sizeof (u32),
1681
1682   .format_trace = format_ip4_forward_next_trace,
1683
1684   .n_next_nodes = 1,
1685   .next_nodes = {
1686     [0] = "error-drop",
1687   },
1688 };
1689
1690 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1691
1692 /* Compute TCP/UDP/ICMP4 checksum in software. */
1693 u16
1694 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1695                               ip4_header_t * ip0)
1696 {
1697   ip_csum_t sum0;
1698   u32 ip_header_length, payload_length_host_byte_order;
1699   u32 n_this_buffer, n_bytes_left;
1700   u16 sum16;
1701   void * data_this_buffer;
1702   
1703   /* Initialize checksum with ip header. */
1704   ip_header_length = ip4_header_bytes (ip0);
1705   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1706   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1707
1708   if (BITS (uword) == 32)
1709     {
1710       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1711       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1712     }
1713   else
1714     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1715
1716   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1717   data_this_buffer = (void *) ip0 + ip_header_length;
1718   if (n_this_buffer + ip_header_length > p0->current_length)
1719     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1720   while (1)
1721     {
1722       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1723       n_bytes_left -= n_this_buffer;
1724       if (n_bytes_left == 0)
1725         break;
1726
1727       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1728       p0 = vlib_get_buffer (vm, p0->next_buffer);
1729       data_this_buffer = vlib_buffer_get_current (p0);
1730       n_this_buffer = p0->current_length;
1731     }
1732
1733   sum16 = ~ ip_csum_fold (sum0);
1734
1735   return sum16;
1736 }
1737
1738 static u32
1739 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1740 {
1741   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1742   udp_header_t * udp0;
1743   u16 sum16;
1744
1745   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1746           || ip0->protocol == IP_PROTOCOL_UDP);
1747
1748   udp0 = (void *) (ip0 + 1);
1749   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1750     {
1751       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1752                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1753       return p0->flags;
1754     }
1755
1756   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1757
1758   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1759                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1760
1761   return p0->flags;
1762 }
1763
1764 static uword
1765 ip4_local (vlib_main_t * vm,
1766            vlib_node_runtime_t * node,
1767            vlib_frame_t * frame)
1768 {
1769   ip4_main_t * im = &ip4_main;
1770   ip_lookup_main_t * lm = &im->lookup_main;
1771   ip_local_next_t next_index;
1772   u32 * from, * to_next, n_left_from, n_left_to_next;
1773   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1774
1775   from = vlib_frame_vector_args (frame);
1776   n_left_from = frame->n_vectors;
1777   next_index = node->cached_next_index;
1778   
1779   if (node->flags & VLIB_NODE_FLAG_TRACE)
1780     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1781
1782   while (n_left_from > 0)
1783     {
1784       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1785
1786       while (n_left_from >= 4 && n_left_to_next >= 2)
1787         {
1788           vlib_buffer_t * p0, * p1;
1789           ip4_header_t * ip0, * ip1;
1790           udp_header_t * udp0, * udp1;
1791           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1792           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1793           ip_adjacency_t * adj0, * adj1;
1794           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1795           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1796           i32 len_diff0, len_diff1;
1797           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1798           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1799           u8 enqueue_code;
1800       
1801           pi0 = to_next[0] = from[0];
1802           pi1 = to_next[1] = from[1];
1803           from += 2;
1804           n_left_from -= 2;
1805           to_next += 2;
1806           n_left_to_next -= 2;
1807       
1808           p0 = vlib_get_buffer (vm, pi0);
1809           p1 = vlib_get_buffer (vm, pi1);
1810
1811           ip0 = vlib_buffer_get_current (p0);
1812           ip1 = vlib_buffer_get_current (p1);
1813
1814           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1815                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1816           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1817                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1818
1819           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1820           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1821
1822           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1823
1824           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1825           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1826
1827           /* Treat IP frag packets as "experimental" protocol for now
1828              until support of IP frag reassembly is implemented */
1829           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1830           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1831           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1832           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1833           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1834           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1835
1836           flags0 = p0->flags;
1837           flags1 = p1->flags;
1838
1839           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1840           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1841
1842           udp0 = ip4_next_header (ip0);
1843           udp1 = ip4_next_header (ip1);
1844
1845           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1846           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1847           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1848
1849           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1850           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1851
1852           /* Verify UDP length. */
1853           ip_len0 = clib_net_to_host_u16 (ip0->length);
1854           ip_len1 = clib_net_to_host_u16 (ip1->length);
1855           udp_len0 = clib_net_to_host_u16 (udp0->length);
1856           udp_len1 = clib_net_to_host_u16 (udp1->length);
1857
1858           len_diff0 = ip_len0 - udp_len0;
1859           len_diff1 = ip_len1 - udp_len1;
1860
1861           len_diff0 = is_udp0 ? len_diff0 : 0;
1862           len_diff1 = is_udp1 ? len_diff1 : 0;
1863
1864           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1865                                 & good_tcp_udp0 & good_tcp_udp1)))
1866             {
1867               if (is_tcp_udp0)
1868                 {
1869                   if (is_tcp_udp0
1870                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1871                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1872                   good_tcp_udp0 =
1873                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1874                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1875                 }
1876               if (is_tcp_udp1)
1877                 {
1878                   if (is_tcp_udp1
1879                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1880                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1881                   good_tcp_udp1 =
1882                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1883                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1884                 }
1885             }
1886
1887           good_tcp_udp0 &= len_diff0 >= 0;
1888           good_tcp_udp1 &= len_diff1 >= 0;
1889
1890           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1891           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1892
1893           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1894
1895           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1896           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1897
1898           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1899           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1900                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1901                     : error0);
1902           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1903                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1904                     : error1);
1905
1906           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1907           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1908
1909           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1910           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1911
1912           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1913           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1914
1915           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1916                                                            &ip0->src_address,
1917                                                            /* no_default_route */ 1));
1918           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1919                                                            &ip1->src_address,
1920                                                            /* no_default_route */ 1));
1921
1922           adj0 = ip_get_adjacency (lm, adj_index0);
1923           adj1 = ip_get_adjacency (lm, adj_index1);
1924
1925           /* 
1926            * Must have a route to source otherwise we drop the packet.
1927            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1928            */
1929           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1930                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1931                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1932                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1933                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1934                     ? IP4_ERROR_SRC_LOOKUP_MISS
1935                     : error0);
1936           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1937                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1938                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1939                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1940                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1941                     ? IP4_ERROR_SRC_LOOKUP_MISS
1942                     : error1);
1943
1944           next0 = lm->local_next_by_ip_protocol[proto0];
1945           next1 = lm->local_next_by_ip_protocol[proto1];
1946
1947           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1948           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1949
1950           p0->error = error0 ? error_node->errors[error0] : 0;
1951           p1->error = error1 ? error_node->errors[error1] : 0;
1952
1953           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1954
1955           if (PREDICT_FALSE (enqueue_code != 0))
1956             {
1957               switch (enqueue_code)
1958                 {
1959                 case 1:
1960                   /* A B A */
1961                   to_next[-2] = pi1;
1962                   to_next -= 1;
1963                   n_left_to_next += 1;
1964                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1965                   break;
1966
1967                 case 2:
1968                   /* A A B */
1969                   to_next -= 1;
1970                   n_left_to_next += 1;
1971                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1972                   break;
1973
1974                 case 3:
1975                   /* A B B or A B C */
1976                   to_next -= 2;
1977                   n_left_to_next += 2;
1978                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1979                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1980                   if (next0 == next1)
1981                     {
1982                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1983                       next_index = next1;
1984                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1985                     }
1986                   break;
1987                 }
1988             }
1989         }
1990
1991       while (n_left_from > 0 && n_left_to_next > 0)
1992         {
1993           vlib_buffer_t * p0;
1994           ip4_header_t * ip0;
1995           udp_header_t * udp0;
1996           ip4_fib_mtrie_t * mtrie0;
1997           ip4_fib_mtrie_leaf_t leaf0;
1998           ip_adjacency_t * adj0;
1999           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2000           i32 len_diff0;
2001           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2002       
2003           pi0 = to_next[0] = from[0];
2004           from += 1;
2005           n_left_from -= 1;
2006           to_next += 1;
2007           n_left_to_next -= 1;
2008       
2009           p0 = vlib_get_buffer (vm, pi0);
2010
2011           ip0 = vlib_buffer_get_current (p0);
2012
2013           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2014                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2015
2016           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2017
2018           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2019
2020           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2021
2022           /* Treat IP frag packets as "experimental" protocol for now
2023              until support of IP frag reassembly is implemented */
2024           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2025           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2026           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2027
2028           flags0 = p0->flags;
2029
2030           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2031
2032           udp0 = ip4_next_header (ip0);
2033
2034           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2035           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2036
2037           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2038
2039           /* Verify UDP length. */
2040           ip_len0 = clib_net_to_host_u16 (ip0->length);
2041           udp_len0 = clib_net_to_host_u16 (udp0->length);
2042
2043           len_diff0 = ip_len0 - udp_len0;
2044
2045           len_diff0 = is_udp0 ? len_diff0 : 0;
2046
2047           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2048             {
2049               if (is_tcp_udp0)
2050                 {
2051                   if (is_tcp_udp0
2052                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2053                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2054                   good_tcp_udp0 =
2055                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2056                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2057                 }
2058             }
2059
2060           good_tcp_udp0 &= len_diff0 >= 0;
2061
2062           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2063
2064           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2065
2066           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2067
2068           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2069           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2070                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2071                     : error0);
2072
2073           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2074
2075           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2076           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2077
2078           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2079                                                            &ip0->src_address,
2080                                                            /* no_default_route */ 1));
2081
2082           adj0 = ip_get_adjacency (lm, adj_index0);
2083
2084           /* Must have a route to source otherwise we drop the packet. */
2085           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2086                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2087                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2088                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2089                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2090                     ? IP4_ERROR_SRC_LOOKUP_MISS
2091                     : error0);
2092
2093           next0 = lm->local_next_by_ip_protocol[proto0];
2094
2095           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2096
2097           p0->error = error0? error_node->errors[error0] : 0;
2098
2099           if (PREDICT_FALSE (next0 != next_index))
2100             {
2101               n_left_to_next += 1;
2102               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2103
2104               next_index = next0;
2105               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2106               to_next[0] = pi0;
2107               to_next += 1;
2108               n_left_to_next -= 1;
2109             }
2110         }
2111   
2112       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2113     }
2114
2115   return frame->n_vectors;
2116 }
2117
2118 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2119   .function = ip4_local,
2120   .name = "ip4-local",
2121   .vector_size = sizeof (u32),
2122
2123   .format_trace = format_ip4_forward_next_trace,
2124
2125   .n_next_nodes = IP_LOCAL_N_NEXT,
2126   .next_nodes = {
2127     [IP_LOCAL_NEXT_DROP] = "error-drop",
2128     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2129     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2130     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2131   },
2132 };
2133
2134 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2135
2136 void ip4_register_protocol (u32 protocol, u32 node_index)
2137 {
2138   vlib_main_t * vm = vlib_get_main();
2139   ip4_main_t * im = &ip4_main;
2140   ip_lookup_main_t * lm = &im->lookup_main;
2141
2142   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2143   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2144 }
2145
2146 static clib_error_t *
2147 show_ip_local_command_fn (vlib_main_t * vm,
2148                           unformat_input_t * input,
2149                          vlib_cli_command_t * cmd)
2150 {
2151   ip4_main_t * im = &ip4_main;
2152   ip_lookup_main_t * lm = &im->lookup_main;
2153   int i;
2154
2155   vlib_cli_output (vm, "Protocols handled by ip4_local");
2156   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2157     {
2158       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2159         vlib_cli_output (vm, "%d", i);
2160     }
2161   return 0;
2162 }
2163
2164
2165
2166 VLIB_CLI_COMMAND (show_ip_local, static) = {
2167   .path = "show ip local",
2168   .function = show_ip_local_command_fn,
2169   .short_help = "Show ip local protocol table",
2170 };
2171
2172 static uword
2173 ip4_arp (vlib_main_t * vm,
2174          vlib_node_runtime_t * node,
2175          vlib_frame_t * frame)
2176 {
2177   vnet_main_t * vnm = vnet_get_main();
2178   ip4_main_t * im = &ip4_main;
2179   ip_lookup_main_t * lm = &im->lookup_main;
2180   u32 * from, * to_next_drop;
2181   uword n_left_from, n_left_to_next_drop, next_index;
2182   static f64 time_last_seed_change = -1e100;
2183   static u32 hash_seeds[3];
2184   static uword hash_bitmap[256 / BITS (uword)]; 
2185   f64 time_now;
2186
2187   if (node->flags & VLIB_NODE_FLAG_TRACE)
2188     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2189
2190   time_now = vlib_time_now (vm);
2191   if (time_now - time_last_seed_change > 1e-3)
2192     {
2193       uword i;
2194       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2195                                              sizeof (hash_seeds));
2196       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2197         hash_seeds[i] = r[i];
2198
2199       /* Mark all hash keys as been no-seen before. */
2200       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2201         hash_bitmap[i] = 0;
2202
2203       time_last_seed_change = time_now;
2204     }
2205
2206   from = vlib_frame_vector_args (frame);
2207   n_left_from = frame->n_vectors;
2208   next_index = node->cached_next_index;
2209   if (next_index == IP4_ARP_NEXT_DROP)
2210     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2211
2212   while (n_left_from > 0)
2213     {
2214       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2215                            to_next_drop, n_left_to_next_drop);
2216
2217       while (n_left_from > 0 && n_left_to_next_drop > 0)
2218         {
2219           vlib_buffer_t * p0;
2220           ip4_header_t * ip0;
2221           ethernet_header_t * eh0;
2222           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2223           uword bm0;
2224           ip_adjacency_t * adj0;
2225
2226           pi0 = from[0];
2227
2228           p0 = vlib_get_buffer (vm, pi0);
2229
2230           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2231           adj0 = ip_get_adjacency (lm, adj_index0);
2232           ip0 = vlib_buffer_get_current (p0);
2233
2234           /* If packet destination is not local, send ARP to next hop */
2235           if (adj0->arp.next_hop.ip4.as_u32)
2236             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2237
2238           /* 
2239            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2240            * rewrite to this packet, we need to skip it here.
2241            * Note, to distinguish from src IP addr *.8.6.*, we
2242            * check for a bcast eth dest instead of IPv4 version.
2243            */
2244           eh0 = (ethernet_header_t*)ip0;
2245           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2246             {
2247               u32 vlan_num = 0;
2248               u16 * etype = &eh0->type;
2249               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2250                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2251                 {
2252                   vlan_num += 1;
2253                   etype += 2; //vlan tag also 16 bits, same as etype
2254                 }
2255               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2256                 {
2257                   vlib_buffer_advance (
2258                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2259                   ip0 = vlib_buffer_get_current (p0);
2260                 }
2261             }
2262
2263           a0 = hash_seeds[0];
2264           b0 = hash_seeds[1];
2265           c0 = hash_seeds[2];
2266
2267           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2268           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2269
2270           a0 ^= ip0->dst_address.data_u32;
2271           b0 ^= sw_if_index0;
2272
2273           hash_v3_finalize32 (a0, b0, c0);
2274
2275           c0 &= BITS (hash_bitmap) - 1;
2276           c0 = c0 / BITS (uword);
2277           m0 = (uword) 1 << (c0 % BITS (uword));
2278
2279           bm0 = hash_bitmap[c0];
2280           drop0 = (bm0 & m0) != 0;
2281
2282           /* Mark it as seen. */
2283           hash_bitmap[c0] = bm0 | m0;
2284
2285           from += 1;
2286           n_left_from -= 1;
2287           to_next_drop[0] = pi0;
2288           to_next_drop += 1;
2289           n_left_to_next_drop -= 1;
2290
2291           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2292
2293           if (drop0)
2294             continue;
2295
2296           /* 
2297            * Can happen if the control-plane is programming tables
2298            * with traffic flowing; at least that's today's lame excuse.
2299            */
2300           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2301             {
2302               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2303             }
2304           else
2305           /* Send ARP request. */
2306           {
2307             u32 bi0 = 0;
2308             vlib_buffer_t * b0;
2309             ethernet_arp_header_t * h0;
2310             vnet_hw_interface_t * hw_if0;
2311
2312             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2313
2314             /* Add rewrite/encap string for ARP packet. */
2315             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2316
2317             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2318
2319             /* Src ethernet address in ARP header. */
2320             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2321                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2322
2323             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2324
2325             /* Copy in destination address we are requesting. */
2326             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2327
2328             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2329             b0 = vlib_get_buffer (vm, bi0);
2330             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2331
2332             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2333
2334             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2335           }
2336         }
2337
2338       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2339     }
2340
2341   return frame->n_vectors;
2342 }
2343
2344 static char * ip4_arp_error_strings[] = {
2345   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2346   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2347   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2348   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2349   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2350 };
2351
2352 VLIB_REGISTER_NODE (ip4_arp_node) = {
2353   .function = ip4_arp,
2354   .name = "ip4-arp",
2355   .vector_size = sizeof (u32),
2356
2357   .format_trace = format_ip4_forward_next_trace,
2358
2359   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2360   .error_strings = ip4_arp_error_strings,
2361
2362   .n_next_nodes = IP4_ARP_N_NEXT,
2363   .next_nodes = {
2364     [IP4_ARP_NEXT_DROP] = "error-drop",
2365   },
2366 };
2367
2368 #define foreach_notrace_ip4_arp_error           \
2369 _(DROP)                                         \
2370 _(REQUEST_SENT)                                 \
2371 _(REPLICATE_DROP)                               \
2372 _(REPLICATE_FAIL)
2373
2374 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2375 {
2376   vlib_node_runtime_t *rt = 
2377     vlib_node_get_runtime (vm, ip4_arp_node.index);
2378
2379   /* don't trace ARP request packets */
2380 #define _(a)                                    \
2381     vnet_pcap_drop_trace_filter_add_del         \
2382         (rt->errors[IP4_ARP_ERROR_##a],         \
2383          1 /* is_add */);
2384     foreach_notrace_ip4_arp_error;
2385 #undef _
2386   return 0;
2387 }
2388
2389 VLIB_INIT_FUNCTION(arp_notrace_init);
2390
2391
2392 /* Send an ARP request to see if given destination is reachable on given interface. */
2393 clib_error_t *
2394 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2395 {
2396   vnet_main_t * vnm = vnet_get_main();
2397   ip4_main_t * im = &ip4_main;
2398   ethernet_arp_header_t * h;
2399   ip4_address_t * src;
2400   ip_interface_address_t * ia;
2401   ip_adjacency_t * adj;
2402   vnet_hw_interface_t * hi;
2403   vnet_sw_interface_t * si;
2404   vlib_buffer_t * b;
2405   u32 bi = 0;
2406
2407   si = vnet_get_sw_interface (vnm, sw_if_index);
2408
2409   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2410     {
2411       return clib_error_return (0, "%U: interface %U down",
2412                                 format_ip4_address, dst, 
2413                                 format_vnet_sw_if_index_name, vnm, 
2414                                 sw_if_index);
2415     }
2416
2417   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2418   if (! src)
2419     {
2420       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2421       return clib_error_return 
2422         (0, "no matching interface address for destination %U (interface %U)",
2423          format_ip4_address, dst,
2424          format_vnet_sw_if_index_name, vnm, sw_if_index);
2425     }
2426
2427   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2428
2429   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2430
2431   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2432
2433   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2434
2435   h->ip4_over_ethernet[0].ip4 = src[0];
2436   h->ip4_over_ethernet[1].ip4 = dst[0];
2437
2438   b = vlib_get_buffer (vm, bi);
2439   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2440
2441   /* Add encapsulation string for software interface (e.g. ethernet header). */
2442   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2443   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2444
2445   {
2446     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2447     u32 * to_next = vlib_frame_vector_args (f);
2448     to_next[0] = bi;
2449     f->n_vectors = 1;
2450     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2451   }
2452
2453   return /* no error */ 0;
2454 }
2455
2456 typedef enum {
2457   IP4_REWRITE_NEXT_DROP,
2458   IP4_REWRITE_NEXT_ARP,
2459 } ip4_rewrite_next_t;
2460
2461 always_inline uword
2462 ip4_rewrite_inline (vlib_main_t * vm,
2463                     vlib_node_runtime_t * node,
2464                     vlib_frame_t * frame,
2465                     int rewrite_for_locally_received_packets)
2466 {
2467   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2468   u32 * from = vlib_frame_vector_args (frame);
2469   u32 n_left_from, n_left_to_next, * to_next, next_index;
2470   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2471   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2472
2473   n_left_from = frame->n_vectors;
2474   next_index = node->cached_next_index;
2475   u32 cpu_index = os_get_cpu_number();
2476   
2477   while (n_left_from > 0)
2478     {
2479       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2480
2481       while (n_left_from >= 4 && n_left_to_next >= 2)
2482         {
2483           ip_adjacency_t * adj0, * adj1;
2484           vlib_buffer_t * p0, * p1;
2485           ip4_header_t * ip0, * ip1;
2486           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2487           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2488           u32 next0_override, next1_override;
2489       
2490           if (rewrite_for_locally_received_packets)
2491               next0_override = next1_override = 0;
2492
2493           /* Prefetch next iteration. */
2494           {
2495             vlib_buffer_t * p2, * p3;
2496
2497             p2 = vlib_get_buffer (vm, from[2]);
2498             p3 = vlib_get_buffer (vm, from[3]);
2499
2500             vlib_prefetch_buffer_header (p2, STORE);
2501             vlib_prefetch_buffer_header (p3, STORE);
2502
2503             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2504             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2505           }
2506
2507           pi0 = to_next[0] = from[0];
2508           pi1 = to_next[1] = from[1];
2509
2510           from += 2;
2511           n_left_from -= 2;
2512           to_next += 2;
2513           n_left_to_next -= 2;
2514       
2515           p0 = vlib_get_buffer (vm, pi0);
2516           p1 = vlib_get_buffer (vm, pi1);
2517
2518           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2519           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2520
2521           /* We should never rewrite a pkt using the MISS adjacency */
2522           ASSERT(adj_index0 && adj_index1);
2523
2524           ip0 = vlib_buffer_get_current (p0);
2525           ip1 = vlib_buffer_get_current (p1);
2526
2527           error0 = error1 = IP4_ERROR_NONE;
2528
2529           /* Decrement TTL & update checksum.
2530              Works either endian, so no need for byte swap. */
2531           if (! rewrite_for_locally_received_packets)
2532             {
2533               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2534
2535               /* Input node should have reject packets with ttl 0. */
2536               ASSERT (ip0->ttl > 0);
2537               ASSERT (ip1->ttl > 0);
2538
2539               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2540               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2541
2542               checksum0 += checksum0 >= 0xffff;
2543               checksum1 += checksum1 >= 0xffff;
2544
2545               ip0->checksum = checksum0;
2546               ip1->checksum = checksum1;
2547
2548               ttl0 -= 1;
2549               ttl1 -= 1;
2550
2551               ip0->ttl = ttl0;
2552               ip1->ttl = ttl1;
2553
2554               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2555               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2556
2557               /* Verify checksum. */
2558               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2559               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2560             }
2561
2562           /* Rewrite packet header and updates lengths. */
2563           adj0 = ip_get_adjacency (lm, adj_index0);
2564           adj1 = ip_get_adjacency (lm, adj_index1);
2565       
2566           if (rewrite_for_locally_received_packets)
2567             {
2568               /*
2569                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2570                * we end up here with a local adjacency in hand
2571                * The local adj rewrite data is 0xfefe on purpose.
2572                * Bad engineer, no donut for you.
2573                */
2574               if (PREDICT_FALSE(adj0->lookup_next_index 
2575                                 == IP_LOOKUP_NEXT_LOCAL))
2576                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2577               if (PREDICT_FALSE(adj0->lookup_next_index
2578                                 == IP_LOOKUP_NEXT_ARP))
2579                 next0_override = IP4_REWRITE_NEXT_ARP;
2580               if (PREDICT_FALSE(adj1->lookup_next_index 
2581                                 == IP_LOOKUP_NEXT_LOCAL))
2582                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2583               if (PREDICT_FALSE(adj1->lookup_next_index
2584                                 == IP_LOOKUP_NEXT_ARP))
2585                 next1_override = IP4_REWRITE_NEXT_ARP;
2586             }
2587
2588           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2589           rw_len0 = adj0[0].rewrite_header.data_bytes;
2590           rw_len1 = adj1[0].rewrite_header.data_bytes;
2591           next0 = (error0 == IP4_ERROR_NONE) 
2592             ? adj0[0].rewrite_header.next_index : 0;
2593
2594           if (rewrite_for_locally_received_packets)
2595               next0 = next0 && next0_override ? next0_override : next0;
2596
2597           next1 = (error1 == IP4_ERROR_NONE)
2598             ? adj1[0].rewrite_header.next_index : 0;
2599
2600           if (rewrite_for_locally_received_packets)
2601               next1 = next1 && next1_override ? next1_override : next1;
2602
2603           /* 
2604            * We've already accounted for an ethernet_header_t elsewhere
2605            */
2606           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2607               vlib_increment_combined_counter 
2608                   (&lm->adjacency_counters,
2609                    cpu_index, adj_index0, 
2610                    /* packet increment */ 0,
2611                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2612
2613           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2614               vlib_increment_combined_counter 
2615                   (&lm->adjacency_counters,
2616                    cpu_index, adj_index1, 
2617                    /* packet increment */ 0,
2618                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2619
2620           /* Check MTU of outgoing interface. */
2621           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2622                     ? IP4_ERROR_MTU_EXCEEDED
2623                     : error0);
2624           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2625                     ? IP4_ERROR_MTU_EXCEEDED
2626                     : error1);
2627
2628           p0->current_data -= rw_len0;
2629           p1->current_data -= rw_len1;
2630
2631           p0->current_length += rw_len0;
2632           p1->current_length += rw_len1;
2633
2634           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2635           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2636       
2637           p0->error = error_node->errors[error0];
2638           p1->error = error_node->errors[error1];
2639
2640           /* Guess we are only writing on simple Ethernet header. */
2641           vnet_rewrite_two_headers (adj0[0], adj1[0],
2642                                     ip0, ip1,
2643                                     sizeof (ethernet_header_t));
2644       
2645           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2646                                            to_next, n_left_to_next,
2647                                            pi0, pi1, next0, next1);
2648         }
2649
2650       while (n_left_from > 0 && n_left_to_next > 0)
2651         {
2652           ip_adjacency_t * adj0;
2653           vlib_buffer_t * p0;
2654           ip4_header_t * ip0;
2655           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2656           u32 next0_override;
2657       
2658           if (rewrite_for_locally_received_packets)
2659               next0_override = 0;
2660
2661           pi0 = to_next[0] = from[0];
2662
2663           p0 = vlib_get_buffer (vm, pi0);
2664
2665           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2666
2667           /* We should never rewrite a pkt using the MISS adjacency */
2668           ASSERT(adj_index0);
2669
2670           adj0 = ip_get_adjacency (lm, adj_index0);
2671       
2672           ip0 = vlib_buffer_get_current (p0);
2673
2674           error0 = IP4_ERROR_NONE;
2675           next0 = 0;            /* drop on error */
2676
2677           /* Decrement TTL & update checksum. */
2678           if (! rewrite_for_locally_received_packets)
2679             {
2680               i32 ttl0 = ip0->ttl;
2681
2682               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2683
2684               checksum0 += checksum0 >= 0xffff;
2685
2686               ip0->checksum = checksum0;
2687
2688               ASSERT (ip0->ttl > 0);
2689
2690               ttl0 -= 1;
2691
2692               ip0->ttl = ttl0;
2693
2694               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2695
2696               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2697             }
2698
2699           if (rewrite_for_locally_received_packets)
2700             {
2701               /*
2702                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2703                * we end up here with a local adjacency in hand
2704                * The local adj rewrite data is 0xfefe on purpose.
2705                * Bad engineer, no donut for you.
2706                */
2707               if (PREDICT_FALSE(adj0->lookup_next_index 
2708                                 == IP_LOOKUP_NEXT_LOCAL))
2709                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2710               /* 
2711                * We have to override the next_index in ARP adjacencies,
2712                * because they're set up for ip4-arp, not this node...
2713                */
2714               if (PREDICT_FALSE(adj0->lookup_next_index
2715                                 == IP_LOOKUP_NEXT_ARP))
2716                 next0_override = IP4_REWRITE_NEXT_ARP;
2717             }
2718
2719           /* Guess we are only writing on simple Ethernet header. */
2720           vnet_rewrite_one_header (adj0[0], ip0, 
2721                                    sizeof (ethernet_header_t));
2722           
2723           /* Update packet buffer attributes/set output interface. */
2724           rw_len0 = adj0[0].rewrite_header.data_bytes;
2725           
2726           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2727               vlib_increment_combined_counter 
2728                   (&lm->adjacency_counters,
2729                    cpu_index, adj_index0, 
2730                    /* packet increment */ 0,
2731                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2732           
2733           /* Check MTU of outgoing interface. */
2734           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2735                     > adj0[0].rewrite_header.max_l3_packet_bytes
2736                     ? IP4_ERROR_MTU_EXCEEDED
2737                     : error0);
2738           
2739           p0->error = error_node->errors[error0];
2740           p0->current_data -= rw_len0;
2741           p0->current_length += rw_len0;
2742           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2743             adj0[0].rewrite_header.sw_if_index;
2744           
2745           next0 = (error0 == IP4_ERROR_NONE)
2746             ? adj0[0].rewrite_header.next_index : 0;
2747
2748           if (rewrite_for_locally_received_packets)
2749               next0 = next0 && next0_override ? next0_override : next0;
2750
2751           from += 1;
2752           n_left_from -= 1;
2753           to_next += 1;
2754           n_left_to_next -= 1;
2755       
2756           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2757                                            to_next, n_left_to_next,
2758                                            pi0, next0);
2759         }
2760   
2761       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2762     }
2763
2764   /* Need to do trace after rewrites to pick up new packet data. */
2765   if (node->flags & VLIB_NODE_FLAG_TRACE)
2766     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2767
2768   return frame->n_vectors;
2769 }
2770
2771 static uword
2772 ip4_rewrite_transit (vlib_main_t * vm,
2773                      vlib_node_runtime_t * node,
2774                      vlib_frame_t * frame)
2775 {
2776   return ip4_rewrite_inline (vm, node, frame,
2777                              /* rewrite_for_locally_received_packets */ 0);
2778 }
2779
2780 static uword
2781 ip4_rewrite_local (vlib_main_t * vm,
2782                    vlib_node_runtime_t * node,
2783                    vlib_frame_t * frame)
2784 {
2785   return ip4_rewrite_inline (vm, node, frame,
2786                              /* rewrite_for_locally_received_packets */ 1);
2787 }
2788
2789 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2790   .function = ip4_rewrite_transit,
2791   .name = "ip4-rewrite-transit",
2792   .vector_size = sizeof (u32),
2793
2794   .format_trace = format_ip4_rewrite_trace,
2795
2796   .n_next_nodes = 2,
2797   .next_nodes = {
2798     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2799     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2800   },
2801 };
2802
2803 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2804
2805 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2806   .function = ip4_rewrite_local,
2807   .name = "ip4-rewrite-local",
2808   .vector_size = sizeof (u32),
2809
2810   .sibling_of = "ip4-rewrite-transit",
2811
2812   .format_trace = format_ip4_rewrite_trace,
2813
2814   .n_next_nodes = 0,
2815 };
2816
2817 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2818
2819 static clib_error_t *
2820 add_del_interface_table (vlib_main_t * vm,
2821                          unformat_input_t * input,
2822                          vlib_cli_command_t * cmd)
2823 {
2824   vnet_main_t * vnm = vnet_get_main();
2825   clib_error_t * error = 0;
2826   u32 sw_if_index, table_id;
2827
2828   sw_if_index = ~0;
2829
2830   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2831     {
2832       error = clib_error_return (0, "unknown interface `%U'",
2833                                  format_unformat_error, input);
2834       goto done;
2835     }
2836
2837   if (unformat (input, "%d", &table_id))
2838     ;
2839   else
2840     {
2841       error = clib_error_return (0, "expected table id `%U'",
2842                                  format_unformat_error, input);
2843       goto done;
2844     }
2845
2846   {
2847     ip4_main_t * im = &ip4_main;
2848     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2849
2850     if (fib) 
2851       {
2852         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2853         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2854     }
2855   }
2856
2857  done:
2858   return error;
2859 }
2860
2861 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2862   .path = "set interface ip table",
2863   .function = add_del_interface_table,
2864   .short_help = "Add/delete FIB table id for interface",
2865 };
2866
2867
2868 static uword
2869 ip4_lookup_multicast (vlib_main_t * vm,
2870                       vlib_node_runtime_t * node,
2871                       vlib_frame_t * frame)
2872 {
2873   ip4_main_t * im = &ip4_main;
2874   ip_lookup_main_t * lm = &im->lookup_main;
2875   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2876   u32 n_left_from, n_left_to_next, * from, * to_next;
2877   ip_lookup_next_t next;
2878   u32 cpu_index = os_get_cpu_number();
2879
2880   from = vlib_frame_vector_args (frame);
2881   n_left_from = frame->n_vectors;
2882   next = node->cached_next_index;
2883
2884   while (n_left_from > 0)
2885     {
2886       vlib_get_next_frame (vm, node, next,
2887                            to_next, n_left_to_next);
2888
2889       while (n_left_from >= 4 && n_left_to_next >= 2)
2890         {
2891           vlib_buffer_t * p0, * p1;
2892           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2893           ip_lookup_next_t next0, next1;
2894           ip4_header_t * ip0, * ip1;
2895           ip_adjacency_t * adj0, * adj1;
2896           u32 fib_index0, fib_index1;
2897           u32 flow_hash_config0, flow_hash_config1;
2898
2899           /* Prefetch next iteration. */
2900           {
2901             vlib_buffer_t * p2, * p3;
2902
2903             p2 = vlib_get_buffer (vm, from[2]);
2904             p3 = vlib_get_buffer (vm, from[3]);
2905
2906             vlib_prefetch_buffer_header (p2, LOAD);
2907             vlib_prefetch_buffer_header (p3, LOAD);
2908
2909             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2910             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2911           }
2912
2913           pi0 = to_next[0] = from[0];
2914           pi1 = to_next[1] = from[1];
2915
2916           p0 = vlib_get_buffer (vm, pi0);
2917           p1 = vlib_get_buffer (vm, pi1);
2918
2919           ip0 = vlib_buffer_get_current (p0);
2920           ip1 = vlib_buffer_get_current (p1);
2921
2922           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2923           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2924           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2925             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2926           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2927             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2928
2929           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2930                                               &ip0->dst_address, p0);
2931           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2932                                               &ip1->dst_address, p1);
2933
2934           adj0 = ip_get_adjacency (lm, adj_index0);
2935           adj1 = ip_get_adjacency (lm, adj_index1);
2936
2937           next0 = adj0->lookup_next_index;
2938           next1 = adj1->lookup_next_index;
2939
2940           flow_hash_config0 = 
2941               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2942
2943           flow_hash_config1 = 
2944               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2945
2946           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2947               (ip0, flow_hash_config0);
2948                                                                   
2949           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2950               (ip1, flow_hash_config1);
2951
2952           ASSERT (adj0->n_adj > 0);
2953           ASSERT (adj1->n_adj > 0);
2954           ASSERT (is_pow2 (adj0->n_adj));
2955           ASSERT (is_pow2 (adj1->n_adj));
2956           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2957           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2958
2959           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2960           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2961
2962           if (1) /* $$$$$$ HACK FIXME */
2963           vlib_increment_combined_counter 
2964               (cm, cpu_index, adj_index0, 1,
2965                vlib_buffer_length_in_chain (vm, p0));
2966           if (1) /* $$$$$$ HACK FIXME */
2967           vlib_increment_combined_counter 
2968               (cm, cpu_index, adj_index1, 1,
2969                vlib_buffer_length_in_chain (vm, p1));
2970
2971           from += 2;
2972           to_next += 2;
2973           n_left_to_next -= 2;
2974           n_left_from -= 2;
2975
2976           wrong_next = (next0 != next) + 2*(next1 != next);
2977           if (PREDICT_FALSE (wrong_next != 0))
2978             {
2979               switch (wrong_next)
2980                 {
2981                 case 1:
2982                   /* A B A */
2983                   to_next[-2] = pi1;
2984                   to_next -= 1;
2985                   n_left_to_next += 1;
2986                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2987                   break;
2988
2989                 case 2:
2990                   /* A A B */
2991                   to_next -= 1;
2992                   n_left_to_next += 1;
2993                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2994                   break;
2995
2996                 case 3:
2997                   /* A B C */
2998                   to_next -= 2;
2999                   n_left_to_next += 2;
3000                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3001                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3002                   if (next0 == next1)
3003                     {
3004                       /* A B B */
3005                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3006                       next = next1;
3007                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3008                     }
3009                 }
3010             }
3011         }
3012     
3013       while (n_left_from > 0 && n_left_to_next > 0)
3014         {
3015           vlib_buffer_t * p0;
3016           ip4_header_t * ip0;
3017           u32 pi0, adj_index0;
3018           ip_lookup_next_t next0;
3019           ip_adjacency_t * adj0;
3020           u32 fib_index0;
3021           u32 flow_hash_config0;
3022
3023           pi0 = from[0];
3024           to_next[0] = pi0;
3025
3026           p0 = vlib_get_buffer (vm, pi0);
3027
3028           ip0 = vlib_buffer_get_current (p0);
3029
3030           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3031                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3032           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3033               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3034           
3035           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3036                                               &ip0->dst_address, p0);
3037
3038           adj0 = ip_get_adjacency (lm, adj_index0);
3039
3040           next0 = adj0->lookup_next_index;
3041
3042           flow_hash_config0 = 
3043               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3044
3045           vnet_buffer (p0)->ip.flow_hash = 
3046             ip4_compute_flow_hash (ip0, flow_hash_config0);
3047
3048           ASSERT (adj0->n_adj > 0);
3049           ASSERT (is_pow2 (adj0->n_adj));
3050           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3051
3052           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3053
3054           if (1) /* $$$$$$ HACK FIXME */
3055               vlib_increment_combined_counter 
3056                   (cm, cpu_index, adj_index0, 1,
3057                    vlib_buffer_length_in_chain (vm, p0));
3058
3059           from += 1;
3060           to_next += 1;
3061           n_left_to_next -= 1;
3062           n_left_from -= 1;
3063
3064           if (PREDICT_FALSE (next0 != next))
3065             {
3066               n_left_to_next += 1;
3067               vlib_put_next_frame (vm, node, next, n_left_to_next);
3068               next = next0;
3069               vlib_get_next_frame (vm, node, next,
3070                                    to_next, n_left_to_next);
3071               to_next[0] = pi0;
3072               to_next += 1;
3073               n_left_to_next -= 1;
3074             }
3075         }
3076
3077       vlib_put_next_frame (vm, node, next, n_left_to_next);
3078     }
3079
3080   if (node->flags & VLIB_NODE_FLAG_TRACE)
3081       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3082
3083   return frame->n_vectors;
3084 }
3085
3086 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3087   .function = ip4_lookup_multicast,
3088   .name = "ip4-lookup-multicast",
3089   .vector_size = sizeof (u32),
3090   .sibling_of = "ip4-lookup",
3091   .format_trace = format_ip4_lookup_trace,
3092
3093   .n_next_nodes = 0,
3094 };
3095
3096 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3097
3098 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3099   .function = ip4_drop,
3100   .name = "ip4-multicast",
3101   .vector_size = sizeof (u32),
3102
3103   .format_trace = format_ip4_forward_next_trace,
3104
3105   .n_next_nodes = 1,
3106   .next_nodes = {
3107     [0] = "error-drop",
3108   },
3109 };
3110
3111 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3112 {
3113   ip4_main_t * im = &ip4_main;
3114   ip4_fib_mtrie_t * mtrie0;
3115   ip4_fib_mtrie_leaf_t leaf0;
3116   u32 adj_index0;
3117     
3118   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3119
3120   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3121   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3122   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3123   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3124   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3125   
3126   /* Handle default route. */
3127   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3128   
3129   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3130   
3131   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3132                                                   a, 
3133                                                   /* no_default_route */ 0);
3134 }
3135  
3136 static clib_error_t *
3137 test_lookup_command_fn (vlib_main_t * vm,
3138                         unformat_input_t * input,
3139                         vlib_cli_command_t * cmd)
3140 {
3141   u32 table_id = 0;
3142   f64 count = 1;
3143   u32 n;
3144   int i;
3145   ip4_address_t ip4_base_address;
3146   u64 errors = 0;
3147
3148   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3149       if (unformat (input, "table %d", &table_id))
3150         ;
3151       else if (unformat (input, "count %f", &count))
3152         ;
3153
3154       else if (unformat (input, "%U",
3155                          unformat_ip4_address, &ip4_base_address))
3156         ;
3157       else
3158         return clib_error_return (0, "unknown input `%U'",
3159                                   format_unformat_error, input);
3160   }
3161
3162   n = count;
3163
3164   for (i = 0; i < n; i++)
3165     {
3166       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3167         errors++;
3168
3169       ip4_base_address.as_u32 = 
3170         clib_host_to_net_u32 (1 + 
3171                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3172     }
3173
3174   if (errors) 
3175     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3176   else
3177     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3178
3179   return 0;
3180 }
3181
3182 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3183     .path = "test lookup",
3184     .short_help = "test lookup",
3185     .function = test_lookup_command_fn,
3186 };
3187
3188 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3189 {
3190   ip4_main_t * im4 = &ip4_main;
3191   ip4_fib_t * fib;
3192   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3193
3194   if (p == 0)
3195     return VNET_API_ERROR_NO_SUCH_FIB;
3196
3197   fib = vec_elt_at_index (im4->fibs, p[0]);
3198
3199   fib->flow_hash_config = flow_hash_config;
3200   return 0;
3201 }
3202  
3203 static clib_error_t *
3204 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3205                              unformat_input_t * input,
3206                              vlib_cli_command_t * cmd)
3207 {
3208   int matched = 0;
3209   u32 table_id = 0;
3210   u32 flow_hash_config = 0;
3211   int rv;
3212
3213   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3214     if (unformat (input, "table %d", &table_id))
3215       matched = 1;
3216 #define _(a,v) \
3217     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3218     foreach_flow_hash_bit
3219 #undef _
3220     else break;
3221   }
3222   
3223   if (matched == 0)
3224     return clib_error_return (0, "unknown input `%U'",
3225                               format_unformat_error, input);
3226   
3227   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3228   switch (rv)
3229     {
3230     case 0:
3231       break;
3232       
3233     case VNET_API_ERROR_NO_SUCH_FIB:
3234       return clib_error_return (0, "no such FIB table %d", table_id);
3235       
3236     default:
3237       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3238       break;
3239     }
3240   
3241   return 0;
3242 }
3243  
3244 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3245   .path = "set ip flow-hash",
3246   .short_help = 
3247   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3248   .function = set_ip_flow_hash_command_fn,
3249 };
3250  
3251 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3252                                  u32 table_index)
3253 {
3254   vnet_main_t * vnm = vnet_get_main();
3255   vnet_interface_main_t * im = &vnm->interface_main;
3256   ip4_main_t * ipm = &ip4_main;
3257   ip_lookup_main_t * lm = &ipm->lookup_main;
3258   vnet_classify_main_t * cm = &vnet_classify_main;
3259
3260   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3261     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3262
3263   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3264     return VNET_API_ERROR_NO_SUCH_ENTRY;
3265
3266   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3267   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3268
3269   return 0;
3270 }
3271
3272 static clib_error_t *
3273 set_ip_classify_command_fn (vlib_main_t * vm,
3274                             unformat_input_t * input,
3275                             vlib_cli_command_t * cmd)
3276 {
3277   u32 table_index = ~0;
3278   int table_index_set = 0;
3279   u32 sw_if_index = ~0;
3280   int rv;
3281   
3282   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3283     if (unformat (input, "table-index %d", &table_index))
3284       table_index_set = 1;
3285     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3286                        vnet_get_main(), &sw_if_index))
3287       ;
3288     else
3289       break;
3290   }
3291       
3292   if (table_index_set == 0)
3293     return clib_error_return (0, "classify table-index must be specified");
3294
3295   if (sw_if_index == ~0)
3296     return clib_error_return (0, "interface / subif must be specified");
3297
3298   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3299
3300   switch (rv)
3301     {
3302     case 0:
3303       break;
3304
3305     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3306       return clib_error_return (0, "No such interface");
3307
3308     case VNET_API_ERROR_NO_SUCH_ENTRY:
3309       return clib_error_return (0, "No such classifier table");
3310     }
3311   return 0;
3312 }
3313
3314 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3315     .path = "set ip classify",
3316     .short_help = 
3317     "set ip classify intfc <int> table-index <index>",
3318     .function = set_ip_classify_command_fn,
3319 };
3320