Add support for multiple microarchitectures in single binary
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       if (table_index_or_id == ~0) {
107         table_index_or_id = 0;
108         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
109           table_index_or_id++;
110         }
111         return create_fib_with_table_id (im, table_index_or_id);
112       }
113
114       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
115       if (! p)
116         return create_fib_with_table_id (im, table_index_or_id);
117       fib_index = p[0];
118     }
119   return vec_elt_at_index (im->fibs, fib_index);
120 }
121
122 static void
123 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
124                                        ip4_fib_t * fib,
125                                        u32 address_length)
126 {
127   hash_t * h;
128   uword max_index;
129
130   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
131   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
132
133   fib->adj_index_by_dst_address[address_length] =
134     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
135
136   hash_set_flags (fib->adj_index_by_dst_address[address_length],
137                   HASH_FLAG_NO_AUTO_SHRINK);
138
139   h = hash_header (fib->adj_index_by_dst_address[address_length]);
140   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
141
142   /* Initialize new/old hash value vectors. */
143   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
144   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
145 }
146
147 static void
148 ip4_fib_set_adj_index (ip4_main_t * im,
149                        ip4_fib_t * fib,
150                        u32 flags,
151                        u32 dst_address_u32,
152                        u32 dst_address_length,
153                        u32 adj_index)
154 {
155   ip_lookup_main_t * lm = &im->lookup_main;
156   uword * hash;
157
158   if (vec_bytes(fib->old_hash_values))
159     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
160   if (vec_bytes(fib->new_hash_values))
161     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
162   fib->new_hash_values[0] = adj_index;
163
164   /* Make sure adj index is valid. */
165   if (CLIB_DEBUG > 0)
166     (void) ip_get_adjacency (lm, adj_index);
167
168   hash = fib->adj_index_by_dst_address[dst_address_length];
169
170   hash = _hash_set3 (hash, dst_address_u32,
171                      fib->new_hash_values,
172                      fib->old_hash_values);
173
174   fib->adj_index_by_dst_address[dst_address_length] = hash;
175
176   if (vec_len (im->add_del_route_callbacks) > 0)
177     {
178       ip4_add_del_route_callback_t * cb;
179       ip4_address_t d;
180       uword * p;
181
182       d.data_u32 = dst_address_u32;
183       vec_foreach (cb, im->add_del_route_callbacks)
184         if ((flags & cb->required_flags) == cb->required_flags)
185           cb->function (im, cb->function_opaque,
186                         fib, flags,
187                         &d, dst_address_length,
188                         fib->old_hash_values,
189                         fib->new_hash_values);
190
191       p = hash_get (hash, dst_address_u32);
192       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
193     }
194 }
195
196 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
197 {
198   ip_lookup_main_t * lm = &im->lookup_main;
199   ip4_fib_t * fib;
200   u32 dst_address, dst_address_length, adj_index, old_adj_index;
201   uword * hash, is_del;
202   ip4_add_del_route_callback_t * cb;
203
204   /* Either create new adjacency or use given one depending on arguments. */
205   if (a->n_add_adj > 0)
206     {
207       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
208       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
209     }
210   else
211     adj_index = a->adj_index;
212
213   dst_address = a->dst_address.data_u32;
214   dst_address_length = a->dst_address_length;
215   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
216
217   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
218   dst_address &= im->fib_masks[dst_address_length];
219
220   if (! fib->adj_index_by_dst_address[dst_address_length])
221     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
222
223   hash = fib->adj_index_by_dst_address[dst_address_length];
224
225   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
226
227   if (is_del)
228     {
229       fib->old_hash_values[0] = ~0;
230       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
231       fib->adj_index_by_dst_address[dst_address_length] = hash;
232
233       if (vec_len (im->add_del_route_callbacks) > 0
234           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
235         {
236           fib->new_hash_values[0] = ~0;
237           vec_foreach (cb, im->add_del_route_callbacks)
238             if ((a->flags & cb->required_flags) == cb->required_flags)
239               cb->function (im, cb->function_opaque,
240                             fib, a->flags,
241                             &a->dst_address, dst_address_length,
242                             fib->old_hash_values,
243                             fib->new_hash_values);
244         }
245     }
246   else
247     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
248                            adj_index);
249
250   old_adj_index = fib->old_hash_values[0];
251
252   /* Avoid spurious reference count increments */
253   if (old_adj_index == adj_index
254       && adj_index != ~0
255       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
256     {
257       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
258       if (adj->share_count > 0)
259         adj->share_count --;
260     }
261
262   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
263                                is_del ? old_adj_index : adj_index,
264                                is_del);
265
266   /* Delete old adjacency index if present and changed. */
267   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
268       && old_adj_index != ~0
269       && old_adj_index != adj_index)
270     ip_del_adjacency (lm, old_adj_index);
271 }
272
273 void
274 ip4_add_del_route_next_hop (ip4_main_t * im,
275                             u32 flags,
276                             ip4_address_t * dst_address,
277                             u32 dst_address_length,
278                             ip4_address_t * next_hop,
279                             u32 next_hop_sw_if_index,
280                             u32 next_hop_weight, u32 adj_index, 
281                             u32 explicit_fib_index)
282 {
283   vnet_main_t * vnm = vnet_get_main();
284   ip_lookup_main_t * lm = &im->lookup_main;
285   u32 fib_index;
286   ip4_fib_t * fib;
287   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
288   u32 dst_adj_index, nh_adj_index;
289   uword * dst_hash, * dst_result;
290   uword * nh_hash, * nh_result;
291   ip_adjacency_t * dst_adj;
292   ip_multipath_adjacency_t * old_mp, * new_mp;
293   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
294   int is_interface_next_hop;
295   clib_error_t * error = 0;
296
297   if (explicit_fib_index == (u32)~0)
298       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
299   else
300       fib_index = explicit_fib_index;
301
302   fib = vec_elt_at_index (im->fibs, fib_index);
303   
304   /* Lookup next hop to be added or deleted. */
305   is_interface_next_hop = next_hop->data_u32 == 0;
306   if (adj_index == (u32)~0)
307     {
308       if (is_interface_next_hop)
309         {
310           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
311           if (nh_result)
312             nh_adj_index = *nh_result;
313           else
314             {
315               ip_adjacency_t * adj;
316               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
317                                       &nh_adj_index);
318               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
319               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
320               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
321             }
322         }
323       else
324         {
325           nh_hash = fib->adj_index_by_dst_address[32];
326           nh_result = hash_get (nh_hash, next_hop->data_u32);
327           
328           /* Next hop must be known. */
329           if (! nh_result)
330             {
331               ip_adjacency_t * adj;
332
333               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
334                                                         next_hop, 0);
335               adj = ip_get_adjacency (lm, nh_adj_index);
336               /* if ARP interface adjacencty is present, we need to
337                  install ARP adjaceny for specific next hop */
338               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
339                   adj->arp.next_hop.ip4.as_u32 == 0)
340                 {
341                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
342                 }
343               else
344                 {
345                   /* Next hop is not known, so create indirect adj */
346                   ip_adjacency_t add_adj;
347                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
348                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
349                   add_adj.explicit_fib_index = explicit_fib_index;
350                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
351                 }
352             }
353           else
354             nh_adj_index = *nh_result;
355         }
356     }
357   else
358     {
359       nh_adj_index = adj_index;
360     }
361   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
362   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
363
364   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
365   dst_result = hash_get (dst_hash, dst_address_u32);
366   if (dst_result)
367     {
368       dst_adj_index = dst_result[0];
369       dst_adj = ip_get_adjacency (lm, dst_adj_index);
370     }
371   else
372     {
373       /* For deletes destination must be known. */
374       if (is_del)
375         {
376           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
377           error = clib_error_return (0, "unknown destination %U/%d",
378                                      format_ip4_address, dst_address,
379                                      dst_address_length);
380           goto done;
381         }
382
383       dst_adj_index = ~0;
384       dst_adj = 0;
385     }
386
387   /* Ignore adds of X/32 with next hop of X. */
388   if (! is_del
389       && dst_address_length == 32
390       && dst_address->data_u32 == next_hop->data_u32 
391       && adj_index != (u32)~0)
392     {
393       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
394       error = clib_error_return (0, "prefix matches next hop %U/%d",
395                                  format_ip4_address, dst_address,
396                                  dst_address_length);
397       goto done;
398     }
399
400   /* Destination is not known and default weight is set so add route
401      to existing non-multipath adjacency */
402   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
403     {
404       /* create new adjacency */
405       ip4_add_del_route_args_t a;
406       a.table_index_or_table_id = fib_index;
407       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
408                  | IP4_ROUTE_FLAG_FIB_INDEX
409                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
410                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
411                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
412       a.dst_address = dst_address[0];
413       a.dst_address_length = dst_address_length;
414       a.adj_index = nh_adj_index;
415       a.add_adj = 0;
416       a.n_add_adj = 0;
417
418       ip4_add_del_route (im, &a);
419
420       goto done;
421     }
422
423   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
424
425   if (! ip_multipath_adjacency_add_del_next_hop
426       (lm, is_del,
427        old_mp_adj_index,
428        nh_adj_index,
429        next_hop_weight,
430        &new_mp_adj_index))
431     {
432       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
433       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
434                                  format_ip4_address, next_hop);
435       goto done;
436     }
437   
438   old_mp = new_mp = 0;
439   if (old_mp_adj_index != ~0)
440     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
441   if (new_mp_adj_index != ~0)
442     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
443
444   if (old_mp != new_mp)
445     {
446       ip4_add_del_route_args_t a;
447       a.table_index_or_table_id = fib_index;
448       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
449                  | IP4_ROUTE_FLAG_FIB_INDEX
450                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
451                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
452       a.dst_address = dst_address[0];
453       a.dst_address_length = dst_address_length;
454       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
455       a.add_adj = 0;
456       a.n_add_adj = 0;
457
458       ip4_add_del_route (im, &a);
459     }
460
461  done:
462   if (error)
463     clib_error_report (error);
464 }
465
466 void *
467 ip4_get_route (ip4_main_t * im,
468                u32 table_index_or_table_id,
469                u32 flags,
470                u8 * address,
471                u32 address_length)
472 {
473   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
474   u32 dst_address = * (u32 *) address;
475   uword * hash, * p;
476
477   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
478   dst_address &= im->fib_masks[address_length];
479
480   hash = fib->adj_index_by_dst_address[address_length];
481   p = hash_get (hash, dst_address);
482   return (void *) p;
483 }
484
485 void
486 ip4_foreach_matching_route (ip4_main_t * im,
487                             u32 table_index_or_table_id,
488                             u32 flags,
489                             ip4_address_t * address,
490                             u32 address_length,
491                             ip4_address_t ** results,
492                             u8 ** result_lengths)
493 {
494   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
495   u32 dst_address = address->data_u32;
496   u32 this_length = address_length;
497   
498   if (*results)
499     _vec_len (*results) = 0;
500   if (*result_lengths)
501     _vec_len (*result_lengths) = 0;
502
503   while (this_length <= 32 && vec_len (results) == 0)
504     {
505       uword k, v;
506       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
507         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
508           {
509             ip4_address_t a;
510             a.data_u32 = k;
511             vec_add1 (*results, a);
512             vec_add1 (*result_lengths, this_length);
513           }
514       }));
515
516       this_length++;
517     }
518 }
519
520 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
521                                   u32 table_index_or_table_id,
522                                   u32 flags)
523 {
524   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
525   ip_lookup_main_t * lm = &im->lookup_main;
526   u32 i, l;
527   ip4_address_t a;
528   ip4_add_del_route_callback_t * cb;
529   static ip4_address_t * to_delete;
530
531   if (lm->n_adjacency_remaps == 0)
532     return;
533
534   for (l = 0; l <= 32; l++)
535     {
536       hash_pair_t * p;
537       uword * hash = fib->adj_index_by_dst_address[l];
538
539       if (hash_elts (hash) == 0)
540         continue;
541
542       if (to_delete)
543         _vec_len (to_delete) = 0;
544
545       hash_foreach_pair (p, hash, ({
546         u32 adj_index = p->value[0];
547         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
548
549         if (m)
550           {
551             /* Record destination address from hash key. */
552             a.data_u32 = p->key;
553
554             /* New adjacency points to nothing: so delete prefix. */
555             if (m == ~0)
556               vec_add1 (to_delete, a);
557             else
558               {
559                 /* Remap to new adjacency. */
560                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
561
562                 /* Set new adjacency value. */
563                 fib->new_hash_values[0] = p->value[0] = m - 1;
564
565                 vec_foreach (cb, im->add_del_route_callbacks)
566                   if ((flags & cb->required_flags) == cb->required_flags)
567                     cb->function (im, cb->function_opaque,
568                                   fib, flags | IP4_ROUTE_FLAG_ADD,
569                                   &a, l,
570                                   fib->old_hash_values,
571                                   fib->new_hash_values);
572               }
573           }
574       }));
575
576       fib->new_hash_values[0] = ~0;
577       for (i = 0; i < vec_len (to_delete); i++)
578         {
579           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
580           vec_foreach (cb, im->add_del_route_callbacks)
581             if ((flags & cb->required_flags) == cb->required_flags)
582               cb->function (im, cb->function_opaque,
583                             fib, flags | IP4_ROUTE_FLAG_DEL,
584                             &a, l,
585                             fib->old_hash_values,
586                             fib->new_hash_values);
587         }
588     }
589
590   /* Also remap adjacencies in mtrie. */
591   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
592
593   /* Reset mapping table. */
594   vec_zero (lm->adjacency_remap_table);
595
596   /* All remaps have been performed. */
597   lm->n_adjacency_remaps = 0;
598 }
599
600 void ip4_delete_matching_routes (ip4_main_t * im,
601                                  u32 table_index_or_table_id,
602                                  u32 flags,
603                                  ip4_address_t * address,
604                                  u32 address_length)
605 {
606   static ip4_address_t * matching_addresses;
607   static u8 * matching_address_lengths;
608   u32 l, i;
609   ip4_add_del_route_args_t a;
610
611   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
612   a.table_index_or_table_id = table_index_or_table_id;
613   a.adj_index = ~0;
614   a.add_adj = 0;
615   a.n_add_adj = 0;
616
617   for (l = address_length + 1; l <= 32; l++)
618     {
619       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
620                                   address,
621                                   l,
622                                   &matching_addresses,
623                                   &matching_address_lengths);
624       for (i = 0; i < vec_len (matching_addresses); i++)
625         {
626           a.dst_address = matching_addresses[i];
627           a.dst_address_length = matching_address_lengths[i];
628           ip4_add_del_route (im, &a);
629         }
630     }
631
632   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
633 }
634
635 always_inline uword
636 ip4_lookup_inline (vlib_main_t * vm,
637                    vlib_node_runtime_t * node,
638                    vlib_frame_t * frame,
639                    int lookup_for_responses_to_locally_received_packets,
640                    int is_indirect)
641 {
642   ip4_main_t * im = &ip4_main;
643   ip_lookup_main_t * lm = &im->lookup_main;
644   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
645   u32 n_left_from, n_left_to_next, * from, * to_next;
646   ip_lookup_next_t next;
647   u32 cpu_index = os_get_cpu_number();
648
649   from = vlib_frame_vector_args (frame);
650   n_left_from = frame->n_vectors;
651   next = node->cached_next_index;
652
653   while (n_left_from > 0)
654     {
655       vlib_get_next_frame (vm, node, next,
656                            to_next, n_left_to_next);
657
658       while (n_left_from >= 4 && n_left_to_next >= 2)
659         {
660           vlib_buffer_t * p0, * p1;
661           ip4_header_t * ip0, * ip1;
662           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
663           ip_lookup_next_t next0, next1;
664           ip_adjacency_t * adj0, * adj1;
665           ip4_fib_mtrie_t * mtrie0, * mtrie1;
666           ip4_fib_mtrie_leaf_t leaf0, leaf1;
667           ip4_address_t * dst_addr0, *dst_addr1;
668           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
669           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
670           u32 flow_hash_config0, flow_hash_config1;
671           u32 hash_c0, hash_c1;
672           u32 wrong_next;
673
674           /* Prefetch next iteration. */
675           {
676             vlib_buffer_t * p2, * p3;
677
678             p2 = vlib_get_buffer (vm, from[2]);
679             p3 = vlib_get_buffer (vm, from[3]);
680
681             vlib_prefetch_buffer_header (p2, LOAD);
682             vlib_prefetch_buffer_header (p3, LOAD);
683
684             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
685             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
686           }
687
688           pi0 = to_next[0] = from[0];
689           pi1 = to_next[1] = from[1];
690
691           p0 = vlib_get_buffer (vm, pi0);
692           p1 = vlib_get_buffer (vm, pi1);
693
694           ip0 = vlib_buffer_get_current (p0);
695           ip1 = vlib_buffer_get_current (p1);
696
697           if (is_indirect)
698             {
699               ip_adjacency_t * iadj0, * iadj1;
700               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
701               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
702               dst_addr0 = &iadj0->indirect.next_hop.ip4;
703               dst_addr1 = &iadj1->indirect.next_hop.ip4;
704             }
705           else
706             {
707               dst_addr0 = &ip0->dst_address;
708               dst_addr1 = &ip1->dst_address;
709             }
710
711           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
712           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
713           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
714             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
715           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
716             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
717
718
719           if (! lookup_for_responses_to_locally_received_packets)
720             {
721               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
722               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
723
724               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
725
726               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
727               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
728             }
729
730           tcp0 = (void *) (ip0 + 1);
731           tcp1 = (void *) (ip1 + 1);
732
733           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
734                          || ip0->protocol == IP_PROTOCOL_UDP);
735           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
736                          || ip1->protocol == IP_PROTOCOL_UDP);
737
738           if (! lookup_for_responses_to_locally_received_packets)
739             {
740               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
741               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
742             }
743
744           if (! lookup_for_responses_to_locally_received_packets)
745             {
746               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
747               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
748             }
749
750           if (! lookup_for_responses_to_locally_received_packets)
751             {
752               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
753               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
754             }
755
756           if (lookup_for_responses_to_locally_received_packets)
757             {
758               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
759               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
760             }
761           else
762             {
763               /* Handle default route. */
764               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
765               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
766
767               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
768               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
769             }
770
771           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
772                                                            dst_addr0,
773                                                            /* no_default_route */ 0));
774           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
775                                                            dst_addr1,
776                                                            /* no_default_route */ 0));
777           adj0 = ip_get_adjacency (lm, adj_index0);
778           adj1 = ip_get_adjacency (lm, adj_index1);
779
780           next0 = adj0->lookup_next_index;
781           next1 = adj1->lookup_next_index;
782
783           /* Use flow hash to compute multipath adjacency. */
784           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
785           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
786           if (PREDICT_FALSE (adj0->n_adj > 1))
787             {
788               flow_hash_config0 = 
789                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
790               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
791                 ip4_compute_flow_hash (ip0, flow_hash_config0);
792             }
793           if (PREDICT_FALSE(adj1->n_adj > 1))
794             {
795               flow_hash_config1 = 
796                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
797               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
798                 ip4_compute_flow_hash (ip1, flow_hash_config1);
799             }
800
801           ASSERT (adj0->n_adj > 0);
802           ASSERT (adj1->n_adj > 0);
803           ASSERT (is_pow2 (adj0->n_adj));
804           ASSERT (is_pow2 (adj1->n_adj));
805           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
806           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
807
808           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
809           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
810
811           vlib_increment_combined_counter 
812               (cm, cpu_index, adj_index0, 1,
813                vlib_buffer_length_in_chain (vm, p0) 
814                + sizeof(ethernet_header_t));
815           vlib_increment_combined_counter 
816               (cm, cpu_index, adj_index1, 1,
817                vlib_buffer_length_in_chain (vm, p1)
818                + sizeof(ethernet_header_t));
819
820           from += 2;
821           to_next += 2;
822           n_left_to_next -= 2;
823           n_left_from -= 2;
824
825           wrong_next = (next0 != next) + 2*(next1 != next);
826           if (PREDICT_FALSE (wrong_next != 0))
827             {
828               switch (wrong_next)
829                 {
830                 case 1:
831                   /* A B A */
832                   to_next[-2] = pi1;
833                   to_next -= 1;
834                   n_left_to_next += 1;
835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
836                   break;
837
838                 case 2:
839                   /* A A B */
840                   to_next -= 1;
841                   n_left_to_next += 1;
842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
843                   break;
844
845                 case 3:
846                   /* A B C */
847                   to_next -= 2;
848                   n_left_to_next += 2;
849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
851                   if (next0 == next1)
852                     {
853                       /* A B B */
854                       vlib_put_next_frame (vm, node, next, n_left_to_next);
855                       next = next1;
856                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
857                     }
858                 }
859             }
860         }
861     
862       while (n_left_from > 0 && n_left_to_next > 0)
863         {
864           vlib_buffer_t * p0;
865           ip4_header_t * ip0;
866           __attribute__((unused)) tcp_header_t * tcp0;
867           ip_lookup_next_t next0;
868           ip_adjacency_t * adj0;
869           ip4_fib_mtrie_t * mtrie0;
870           ip4_fib_mtrie_leaf_t leaf0;
871           ip4_address_t * dst_addr0;
872           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
873           u32 flow_hash_config0, hash_c0;
874
875           pi0 = from[0];
876           to_next[0] = pi0;
877
878           p0 = vlib_get_buffer (vm, pi0);
879
880           ip0 = vlib_buffer_get_current (p0);
881
882           if (is_indirect)
883             {
884               ip_adjacency_t * iadj0;
885               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
886               dst_addr0 = &iadj0->indirect.next_hop.ip4;
887             }
888           else
889             {
890               dst_addr0 = &ip0->dst_address;
891             }
892
893           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
894           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
895             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
896
897           if (! lookup_for_responses_to_locally_received_packets)
898             {
899               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
900
901               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
902
903               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
904             }
905
906           tcp0 = (void *) (ip0 + 1);
907
908           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
909                          || ip0->protocol == IP_PROTOCOL_UDP);
910
911           if (! lookup_for_responses_to_locally_received_packets)
912             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
913
914           if (! lookup_for_responses_to_locally_received_packets)
915             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
916
917           if (! lookup_for_responses_to_locally_received_packets)
918             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
919
920           if (lookup_for_responses_to_locally_received_packets)
921             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
922           else
923             {
924               /* Handle default route. */
925               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
926               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
927             }
928
929           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
930                                                            dst_addr0,
931                                                            /* no_default_route */ 0));
932
933           adj0 = ip_get_adjacency (lm, adj_index0);
934
935           next0 = adj0->lookup_next_index;
936
937           /* Use flow hash to compute multipath adjacency. */
938           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
939           if (PREDICT_FALSE(adj0->n_adj > 1))
940             {
941               flow_hash_config0 = 
942                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
943
944               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
945                 ip4_compute_flow_hash (ip0, flow_hash_config0);
946             }
947
948           ASSERT (adj0->n_adj > 0);
949           ASSERT (is_pow2 (adj0->n_adj));
950           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
951
952           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
953
954           vlib_increment_combined_counter 
955               (cm, cpu_index, adj_index0, 1,
956                vlib_buffer_length_in_chain (vm, p0)
957                + sizeof(ethernet_header_t));
958
959           from += 1;
960           to_next += 1;
961           n_left_to_next -= 1;
962           n_left_from -= 1;
963
964           if (PREDICT_FALSE (next0 != next))
965             {
966               n_left_to_next += 1;
967               vlib_put_next_frame (vm, node, next, n_left_to_next);
968               next = next0;
969               vlib_get_next_frame (vm, node, next,
970                                    to_next, n_left_to_next);
971               to_next[0] = pi0;
972               to_next += 1;
973               n_left_to_next -= 1;
974             }
975         }
976
977       vlib_put_next_frame (vm, node, next, n_left_to_next);
978     }
979
980   return frame->n_vectors;
981 }
982
983 static uword
984 ip4_lookup (vlib_main_t * vm,
985             vlib_node_runtime_t * node,
986             vlib_frame_t * frame)
987 {
988   return ip4_lookup_inline (vm, node, frame,
989                             /* lookup_for_responses_to_locally_received_packets */ 0,
990                             /* is_indirect */ 0);
991
992 }
993
994 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
995                                         ip_adjacency_t * adj,
996                                         u32 sw_if_index,
997                                         u32 if_address_index)
998 {
999   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1000   ip_lookup_next_t n;
1001   vnet_l3_packet_type_t packet_type;
1002   u32 node_index;
1003
1004   if (hw->hw_class_index == ethernet_hw_interface_class.index
1005       || hw->hw_class_index == srp_hw_interface_class.index)
1006     {
1007       /* 
1008        * We have a bit of a problem in this case. ip4-arp uses
1009        * the rewrite_header.next_index to hand pkts to the
1010        * indicated inteface output node. We can end up in
1011        * ip4_rewrite_local, too, which also pays attention to 
1012        * rewrite_header.next index. Net result: a hack in
1013        * ip4_rewrite_local...
1014        */
1015       n = IP_LOOKUP_NEXT_ARP;
1016       node_index = ip4_arp_node.index;
1017       adj->if_address_index = if_address_index;
1018       adj->arp.next_hop.ip4.as_u32 = 0;
1019       ip46_address_reset(&adj->arp.next_hop);
1020       packet_type = VNET_L3_PACKET_TYPE_ARP;
1021     }
1022   else
1023     {
1024       n = IP_LOOKUP_NEXT_REWRITE;
1025       node_index = ip4_rewrite_node.index;
1026       packet_type = VNET_L3_PACKET_TYPE_IP4;
1027     }
1028
1029   adj->lookup_next_index = n;
1030   vnet_rewrite_for_sw_interface
1031     (vnm,
1032      packet_type,
1033      sw_if_index,
1034      node_index,
1035      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1036      &adj->rewrite_header,
1037      sizeof (adj->rewrite_data));
1038 }
1039
1040 static void
1041 ip4_add_interface_routes (u32 sw_if_index,
1042                           ip4_main_t * im, u32 fib_index,
1043                           ip_interface_address_t * a)
1044 {
1045   vnet_main_t * vnm = vnet_get_main();
1046   ip_lookup_main_t * lm = &im->lookup_main;
1047   ip_adjacency_t * adj;
1048   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1049   ip4_add_del_route_args_t x;
1050   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1051   u32 classify_table_index;
1052
1053   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1054   x.table_index_or_table_id = fib_index;
1055   x.flags = (IP4_ROUTE_FLAG_ADD
1056              | IP4_ROUTE_FLAG_FIB_INDEX
1057              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1058   x.dst_address = address[0];
1059   x.dst_address_length = a->address_length;
1060   x.n_add_adj = 0;
1061   x.add_adj = 0;
1062
1063   a->neighbor_probe_adj_index = ~0;
1064   if (a->address_length < 32)
1065     {
1066       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1067                               &x.adj_index);
1068       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1069       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1070       ip4_add_del_route (im, &x);
1071       a->neighbor_probe_adj_index = x.adj_index;
1072     }
1073   
1074   /* Add e.g. 1.1.1.1/32 as local to this host. */
1075   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1076                           &x.adj_index);
1077   
1078   classify_table_index = ~0;
1079   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1080     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1081   if (classify_table_index != (u32) ~0)
1082     {
1083       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1084       adj->classify.table_index = classify_table_index;
1085     }
1086   else
1087     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1088   
1089   adj->if_address_index = a - lm->if_address_pool;
1090   adj->rewrite_header.sw_if_index = sw_if_index;
1091   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1092   /* 
1093    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1094    * fail an RPF-ish check, but still go thru the rewrite code...
1095    */
1096   adj->rewrite_header.data_bytes = 0;
1097
1098   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1099   x.dst_address_length = 32;
1100   ip4_add_del_route (im, &x);
1101 }
1102
1103 static void
1104 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1105 {
1106   ip4_add_del_route_args_t x;
1107
1108   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1109   x.table_index_or_table_id = fib_index;
1110   x.flags = (IP4_ROUTE_FLAG_DEL
1111              | IP4_ROUTE_FLAG_FIB_INDEX
1112              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1113   x.dst_address = address[0];
1114   x.dst_address_length = address_length;
1115   x.adj_index = ~0;
1116   x.n_add_adj = 0;
1117   x.add_adj = 0;
1118
1119   if (address_length < 32)
1120     ip4_add_del_route (im, &x);
1121
1122   x.dst_address_length = 32;
1123   ip4_add_del_route (im, &x);
1124
1125   ip4_delete_matching_routes (im,
1126                               fib_index,
1127                               IP4_ROUTE_FLAG_FIB_INDEX,
1128                               address,
1129                               address_length);
1130 }
1131
1132 typedef struct {
1133     u32 sw_if_index;
1134     ip4_address_t address;
1135     u32 length;
1136 } ip4_interface_address_t;
1137
1138 static clib_error_t *
1139 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1140                                         u32 sw_if_index,
1141                                         ip4_address_t * new_address,
1142                                         u32 new_length,
1143                                         u32 redistribute,
1144                                         u32 insert_routes,
1145                                         u32 is_del);
1146
1147 static clib_error_t *
1148 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1149                                         u32 sw_if_index,
1150                                         ip4_address_t * address,
1151                                         u32 address_length,
1152                                         u32 redistribute,
1153                                         u32 insert_routes,
1154                                         u32 is_del)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip4_main_t * im = &ip4_main;
1158   ip_lookup_main_t * lm = &im->lookup_main;
1159   clib_error_t * error = 0;
1160   u32 if_address_index, elts_before;
1161   ip4_address_fib_t ip4_af, * addr_fib = 0;
1162
1163   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1164   ip4_addr_fib_init (&ip4_af, address,
1165                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1166   vec_add1 (addr_fib, ip4_af);
1167
1168   /* When adding an address check that it does not conflict with an existing address. */
1169   if (! is_del)
1170     {
1171       ip_interface_address_t * ia;
1172       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1173                                     0 /* honor unnumbered */,
1174       ({
1175         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1176
1177         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1178             || ip4_destination_matches_route (im, x, address, address_length))
1179           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1180                                     format_ip4_address_and_length, address, address_length,
1181                                     format_ip4_address_and_length, x, ia->address_length,
1182                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1183       }));
1184     }
1185
1186   elts_before = pool_elts (lm->if_address_pool);
1187
1188   error = ip_interface_address_add_del
1189     (lm,
1190      sw_if_index,
1191      addr_fib,
1192      address_length,
1193      is_del,
1194      &if_address_index);
1195   if (error)
1196     goto done;
1197   
1198   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1199     {
1200       if (is_del)
1201         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1202                                   address_length);
1203       
1204       else
1205           ip4_add_interface_routes (sw_if_index,
1206                                     im, ip4_af.fib_index,
1207                                     pool_elt_at_index 
1208                                     (lm->if_address_pool, if_address_index));
1209     }
1210
1211   /* If pool did not grow/shrink: add duplicate address. */
1212   if (elts_before != pool_elts (lm->if_address_pool))
1213     {
1214       ip4_add_del_interface_address_callback_t * cb;
1215       vec_foreach (cb, im->add_del_interface_address_callbacks)
1216         cb->function (im, cb->function_opaque, sw_if_index,
1217                       address, address_length,
1218                       if_address_index,
1219                       is_del);
1220     }
1221
1222  done:
1223   vec_free (addr_fib);
1224   return error;
1225 }
1226
1227 clib_error_t *
1228 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1229                                ip4_address_t * address, u32 address_length,
1230                                u32 is_del)
1231 {
1232   return ip4_add_del_interface_address_internal
1233     (vm, sw_if_index, address, address_length,
1234      /* redistribute */ 1,
1235      /* insert_routes */ 1,
1236      is_del);
1237 }
1238
1239 static clib_error_t *
1240 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1241                                 u32 sw_if_index,
1242                                 u32 flags)
1243 {
1244   ip4_main_t * im = &ip4_main;
1245   ip_interface_address_t * ia;
1246   ip4_address_t * a;
1247   u32 is_admin_up, fib_index;
1248   
1249   /* Fill in lookup tables with default table (0). */
1250   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1251   
1252   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1253   
1254   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1255   
1256   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1257
1258   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1259                                 0 /* honor unnumbered */,
1260   ({
1261     a = ip_interface_address_get_address (&im->lookup_main, ia);
1262     if (is_admin_up)
1263       ip4_add_interface_routes (sw_if_index,
1264                                 im, fib_index,
1265                                 ia);
1266     else
1267       ip4_del_interface_routes (im, fib_index,
1268                                 a, ia->address_length);
1269   }));
1270
1271   return 0;
1272 }
1273  
1274 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1275
1276 static clib_error_t *
1277 ip4_sw_interface_add_del (vnet_main_t * vnm,
1278                           u32 sw_if_index,
1279                           u32 is_add)
1280 {
1281   vlib_main_t * vm = vnm->vlib_main;
1282   ip4_main_t * im = &ip4_main;
1283   ip_lookup_main_t * lm = &im->lookup_main;
1284   u32 ci, cast;
1285
1286   for (cast = 0; cast < VNET_N_CAST; cast++)
1287     {
1288       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1289       vnet_config_main_t * vcm = &cm->config_main;
1290
1291       if (! vcm->node_index_by_feature_index)
1292         {
1293           if (cast == VNET_UNICAST)
1294             {
1295               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1296               static char * feature_nodes[] = {
1297                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1298                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1299                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1300                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1301                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1302                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1303               };
1304
1305               vnet_config_init (vm, vcm,
1306                                 start_nodes, ARRAY_LEN (start_nodes),
1307                                 feature_nodes, ARRAY_LEN (feature_nodes));
1308             }
1309           else
1310             {
1311               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1312               static char * feature_nodes[] = {
1313                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1314                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1315               };
1316
1317               vnet_config_init (vm, vcm,
1318                                 start_nodes, ARRAY_LEN (start_nodes),
1319                                 feature_nodes, ARRAY_LEN (feature_nodes));
1320             }
1321         }
1322
1323       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1324       ci = cm->config_index_by_sw_if_index[sw_if_index];
1325
1326       if (is_add)
1327         ci = vnet_config_add_feature (vm, vcm,
1328                                       ci,
1329                                       IP4_RX_FEATURE_LOOKUP,
1330                                       /* config data */ 0,
1331                                       /* # bytes of config data */ 0);
1332       else
1333         ci = vnet_config_del_feature (vm, vcm,
1334                                       ci,
1335                                       IP4_RX_FEATURE_LOOKUP,
1336                                       /* config data */ 0,
1337                                       /* # bytes of config data */ 0);
1338
1339       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1340     }
1341
1342   return /* no error */ 0;
1343 }
1344
1345 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1346
1347
1348 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1349   .function = ip4_lookup,
1350   .name = "ip4-lookup",
1351   .vector_size = sizeof (u32),
1352
1353   .n_next_nodes = IP_LOOKUP_N_NEXT,
1354   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1355 };
1356
1357 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1358
1359 static uword
1360 ip4_indirect (vlib_main_t * vm,
1361                vlib_node_runtime_t * node,
1362                vlib_frame_t * frame)
1363 {
1364   return ip4_lookup_inline (vm, node, frame,
1365                             /* lookup_for_responses_to_locally_received_packets */ 0,
1366                             /* is_indirect */ 1);
1367 }
1368
1369 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1370   .function = ip4_indirect,
1371   .name = "ip4-indirect",
1372   .vector_size = sizeof (u32),
1373
1374   .n_next_nodes = IP_LOOKUP_N_NEXT,
1375   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1376 };
1377
1378 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1379
1380
1381 /* Global IP4 main. */
1382 ip4_main_t ip4_main;
1383
1384 clib_error_t *
1385 ip4_lookup_init (vlib_main_t * vm)
1386 {
1387   ip4_main_t * im = &ip4_main;
1388   uword i;
1389
1390   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1391     {
1392       u32 m;
1393
1394       if (i < 32)
1395         m = pow2_mask (i) << (32 - i);
1396       else 
1397         m = ~0;
1398       im->fib_masks[i] = clib_host_to_net_u32 (m);
1399     }
1400
1401   /* Create FIB with index 0 and table id of 0. */
1402   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1403
1404   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1405
1406   {
1407     pg_node_t * pn;
1408     pn = pg_get_node (ip4_lookup_node.index);
1409     pn->unformat_edit = unformat_pg_ip4_header;
1410   }
1411
1412   {
1413     ethernet_arp_header_t h;
1414
1415     memset (&h, 0, sizeof (h));
1416
1417     /* Set target ethernet address to all zeros. */
1418     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1419
1420 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1421 #define _8(f,v) h.f = v;
1422     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1423     _16 (l3_type, ETHERNET_TYPE_IP4);
1424     _8 (n_l2_address_bytes, 6);
1425     _8 (n_l3_address_bytes, 4);
1426     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1427 #undef _16
1428 #undef _8
1429
1430     vlib_packet_template_init (vm,
1431                                &im->ip4_arp_request_packet_template,
1432                                /* data */ &h,
1433                                sizeof (h),
1434                                /* alloc chunk size */ 8,
1435                                "ip4 arp");
1436   }
1437
1438   return 0;
1439 }
1440
1441 VLIB_INIT_FUNCTION (ip4_lookup_init);
1442
1443 typedef struct {
1444   /* Adjacency taken. */
1445   u32 adj_index;
1446   u32 flow_hash;
1447   u32 fib_index;
1448
1449   /* Packet data, possibly *after* rewrite. */
1450   u8 packet_data[64 - 1*sizeof(u32)];
1451 } ip4_forward_next_trace_t;
1452
1453 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1454 {
1455   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1456   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1457   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1458   vnet_main_t * vnm = vnet_get_main();
1459   ip4_main_t * im = &ip4_main;
1460   ip_adjacency_t * adj;
1461   uword indent = format_get_indent (s);
1462
1463   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1464   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1465               t->fib_index, t->adj_index, format_ip_adjacency,
1466               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1467   switch (adj->lookup_next_index)
1468     {
1469     case IP_LOOKUP_NEXT_REWRITE:
1470       s = format (s, "\n%U%U",
1471                   format_white_space, indent,
1472                   format_ip_adjacency_packet_data,
1473                   vnm, &im->lookup_main, t->adj_index,
1474                   t->packet_data, sizeof (t->packet_data));
1475       break;
1476
1477     default:
1478       break;
1479     }
1480
1481   return s;
1482 }
1483
1484 /* Common trace function for all ip4-forward next nodes. */
1485 void
1486 ip4_forward_next_trace (vlib_main_t * vm,
1487                         vlib_node_runtime_t * node,
1488                         vlib_frame_t * frame,
1489                         vlib_rx_or_tx_t which_adj_index)
1490 {
1491   u32 * from, n_left;
1492   ip4_main_t * im = &ip4_main;
1493
1494   n_left = frame->n_vectors;
1495   from = vlib_frame_vector_args (frame);
1496   
1497   while (n_left >= 4)
1498     {
1499       u32 bi0, bi1;
1500       vlib_buffer_t * b0, * b1;
1501       ip4_forward_next_trace_t * t0, * t1;
1502
1503       /* Prefetch next iteration. */
1504       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1505       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1506
1507       bi0 = from[0];
1508       bi1 = from[1];
1509
1510       b0 = vlib_get_buffer (vm, bi0);
1511       b1 = vlib_get_buffer (vm, bi1);
1512
1513       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1514         {
1515           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1516           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1517           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1518           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1519                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1520           clib_memcpy (t0->packet_data,
1521                   vlib_buffer_get_current (b0),
1522                   sizeof (t0->packet_data));
1523         }
1524       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1525         {
1526           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1527           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1528           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1529           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1530                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1531           clib_memcpy (t1->packet_data,
1532                   vlib_buffer_get_current (b1),
1533                   sizeof (t1->packet_data));
1534         }
1535       from += 2;
1536       n_left -= 2;
1537     }
1538
1539   while (n_left >= 1)
1540     {
1541       u32 bi0;
1542       vlib_buffer_t * b0;
1543       ip4_forward_next_trace_t * t0;
1544
1545       bi0 = from[0];
1546
1547       b0 = vlib_get_buffer (vm, bi0);
1548
1549       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1550         {
1551           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1552           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1553           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1554           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1555                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1556           clib_memcpy (t0->packet_data,
1557                   vlib_buffer_get_current (b0),
1558                   sizeof (t0->packet_data));
1559         }
1560       from += 1;
1561       n_left -= 1;
1562     }
1563 }
1564
1565 static uword
1566 ip4_drop_or_punt (vlib_main_t * vm,
1567                   vlib_node_runtime_t * node,
1568                   vlib_frame_t * frame,
1569                   ip4_error_t error_code)
1570 {
1571   u32 * buffers = vlib_frame_vector_args (frame);
1572   uword n_packets = frame->n_vectors;
1573
1574   vlib_error_drop_buffers (vm, node,
1575                            buffers,
1576                            /* stride */ 1,
1577                            n_packets,
1578                            /* next */ 0,
1579                            ip4_input_node.index,
1580                            error_code);
1581
1582   if (node->flags & VLIB_NODE_FLAG_TRACE)
1583     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1584
1585   return n_packets;
1586 }
1587
1588 static uword
1589 ip4_drop (vlib_main_t * vm,
1590           vlib_node_runtime_t * node,
1591           vlib_frame_t * frame)
1592 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1593
1594 static uword
1595 ip4_punt (vlib_main_t * vm,
1596           vlib_node_runtime_t * node,
1597           vlib_frame_t * frame)
1598 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1599
1600 static uword
1601 ip4_miss (vlib_main_t * vm,
1602           vlib_node_runtime_t * node,
1603           vlib_frame_t * frame)
1604 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1605
1606 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1607   .function = ip4_drop,
1608   .name = "ip4-drop",
1609   .vector_size = sizeof (u32),
1610
1611   .format_trace = format_ip4_forward_next_trace,
1612
1613   .n_next_nodes = 1,
1614   .next_nodes = {
1615     [0] = "error-drop",
1616   },
1617 };
1618
1619 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1620
1621 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1622   .function = ip4_punt,
1623   .name = "ip4-punt",
1624   .vector_size = sizeof (u32),
1625
1626   .format_trace = format_ip4_forward_next_trace,
1627
1628   .n_next_nodes = 1,
1629   .next_nodes = {
1630     [0] = "error-punt",
1631   },
1632 };
1633
1634 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1635
1636 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1637   .function = ip4_miss,
1638   .name = "ip4-miss",
1639   .vector_size = sizeof (u32),
1640
1641   .format_trace = format_ip4_forward_next_trace,
1642
1643   .n_next_nodes = 1,
1644   .next_nodes = {
1645     [0] = "error-drop",
1646   },
1647 };
1648
1649 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1650
1651 /* Compute TCP/UDP/ICMP4 checksum in software. */
1652 u16
1653 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1654                               ip4_header_t * ip0)
1655 {
1656   ip_csum_t sum0;
1657   u32 ip_header_length, payload_length_host_byte_order;
1658   u32 n_this_buffer, n_bytes_left;
1659   u16 sum16;
1660   void * data_this_buffer;
1661   
1662   /* Initialize checksum with ip header. */
1663   ip_header_length = ip4_header_bytes (ip0);
1664   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1665   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1666
1667   if (BITS (uword) == 32)
1668     {
1669       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1670       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1671     }
1672   else
1673     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1674
1675   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1676   data_this_buffer = (void *) ip0 + ip_header_length;
1677   if (n_this_buffer + ip_header_length > p0->current_length)
1678     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1679   while (1)
1680     {
1681       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1682       n_bytes_left -= n_this_buffer;
1683       if (n_bytes_left == 0)
1684         break;
1685
1686       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1687       p0 = vlib_get_buffer (vm, p0->next_buffer);
1688       data_this_buffer = vlib_buffer_get_current (p0);
1689       n_this_buffer = p0->current_length;
1690     }
1691
1692   sum16 = ~ ip_csum_fold (sum0);
1693
1694   return sum16;
1695 }
1696
1697 static u32
1698 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1699 {
1700   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1701   udp_header_t * udp0;
1702   u16 sum16;
1703
1704   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1705           || ip0->protocol == IP_PROTOCOL_UDP);
1706
1707   udp0 = (void *) (ip0 + 1);
1708   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1709     {
1710       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1711                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1712       return p0->flags;
1713     }
1714
1715   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1716
1717   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1718                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1719
1720   return p0->flags;
1721 }
1722
1723 static uword
1724 ip4_local (vlib_main_t * vm,
1725            vlib_node_runtime_t * node,
1726            vlib_frame_t * frame)
1727 {
1728   ip4_main_t * im = &ip4_main;
1729   ip_lookup_main_t * lm = &im->lookup_main;
1730   ip_local_next_t next_index;
1731   u32 * from, * to_next, n_left_from, n_left_to_next;
1732   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1733
1734   from = vlib_frame_vector_args (frame);
1735   n_left_from = frame->n_vectors;
1736   next_index = node->cached_next_index;
1737   
1738   if (node->flags & VLIB_NODE_FLAG_TRACE)
1739     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1740
1741   while (n_left_from > 0)
1742     {
1743       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1744
1745       while (n_left_from >= 4 && n_left_to_next >= 2)
1746         {
1747           vlib_buffer_t * p0, * p1;
1748           ip4_header_t * ip0, * ip1;
1749           udp_header_t * udp0, * udp1;
1750           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1751           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1752           ip_adjacency_t * adj0, * adj1;
1753           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1754           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1755           i32 len_diff0, len_diff1;
1756           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1757           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1758           u8 enqueue_code;
1759       
1760           pi0 = to_next[0] = from[0];
1761           pi1 = to_next[1] = from[1];
1762           from += 2;
1763           n_left_from -= 2;
1764           to_next += 2;
1765           n_left_to_next -= 2;
1766       
1767           p0 = vlib_get_buffer (vm, pi0);
1768           p1 = vlib_get_buffer (vm, pi1);
1769
1770           ip0 = vlib_buffer_get_current (p0);
1771           ip1 = vlib_buffer_get_current (p1);
1772
1773           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1774                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1775           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1776                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1777
1778           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1779           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1780
1781           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1782
1783           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1784           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1785
1786           proto0 = ip0->protocol;
1787           proto1 = ip1->protocol;
1788           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1789           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1790           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1791           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1792
1793           flags0 = p0->flags;
1794           flags1 = p1->flags;
1795
1796           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1797           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1798
1799           udp0 = ip4_next_header (ip0);
1800           udp1 = ip4_next_header (ip1);
1801
1802           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1803           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1804           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1805
1806           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1807           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1808
1809           /* Verify UDP length. */
1810           ip_len0 = clib_net_to_host_u16 (ip0->length);
1811           ip_len1 = clib_net_to_host_u16 (ip1->length);
1812           udp_len0 = clib_net_to_host_u16 (udp0->length);
1813           udp_len1 = clib_net_to_host_u16 (udp1->length);
1814
1815           len_diff0 = ip_len0 - udp_len0;
1816           len_diff1 = ip_len1 - udp_len1;
1817
1818           len_diff0 = is_udp0 ? len_diff0 : 0;
1819           len_diff1 = is_udp1 ? len_diff1 : 0;
1820
1821           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1822                                 & good_tcp_udp0 & good_tcp_udp1)))
1823             {
1824               if (is_tcp_udp0)
1825                 {
1826                   if (is_tcp_udp0
1827                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1828                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1829                   good_tcp_udp0 =
1830                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1831                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1832                 }
1833               if (is_tcp_udp1)
1834                 {
1835                   if (is_tcp_udp1
1836                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1837                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1838                   good_tcp_udp1 =
1839                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1840                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1841                 }
1842             }
1843
1844           good_tcp_udp0 &= len_diff0 >= 0;
1845           good_tcp_udp1 &= len_diff1 >= 0;
1846
1847           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1848           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1849
1850           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1851
1852           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1853           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1854
1855           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1856           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1857                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1858                     : error0);
1859           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1860                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1861                     : error1);
1862
1863           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1864           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1865
1866           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1867           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1868
1869           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1870           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1871
1872           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1873                                                            &ip0->src_address,
1874                                                            /* no_default_route */ 1));
1875           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1876                                                            &ip1->src_address,
1877                                                            /* no_default_route */ 1));
1878
1879           adj0 = ip_get_adjacency (lm, adj_index0);
1880           adj1 = ip_get_adjacency (lm, adj_index1);
1881
1882           /* 
1883            * Must have a route to source otherwise we drop the packet.
1884            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1885            */
1886           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1887                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1888                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1889                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1890                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1891                     ? IP4_ERROR_SRC_LOOKUP_MISS
1892                     : error0);
1893           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1894                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1895                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1896                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1897                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1898                     ? IP4_ERROR_SRC_LOOKUP_MISS
1899                     : error1);
1900
1901           next0 = lm->local_next_by_ip_protocol[proto0];
1902           next1 = lm->local_next_by_ip_protocol[proto1];
1903
1904           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1905           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1906
1907           p0->error = error0 ? error_node->errors[error0] : 0;
1908           p1->error = error1 ? error_node->errors[error1] : 0;
1909
1910           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1911
1912           if (PREDICT_FALSE (enqueue_code != 0))
1913             {
1914               switch (enqueue_code)
1915                 {
1916                 case 1:
1917                   /* A B A */
1918                   to_next[-2] = pi1;
1919                   to_next -= 1;
1920                   n_left_to_next += 1;
1921                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1922                   break;
1923
1924                 case 2:
1925                   /* A A B */
1926                   to_next -= 1;
1927                   n_left_to_next += 1;
1928                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1929                   break;
1930
1931                 case 3:
1932                   /* A B B or A B C */
1933                   to_next -= 2;
1934                   n_left_to_next += 2;
1935                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1936                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1937                   if (next0 == next1)
1938                     {
1939                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1940                       next_index = next1;
1941                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1942                     }
1943                   break;
1944                 }
1945             }
1946         }
1947
1948       while (n_left_from > 0 && n_left_to_next > 0)
1949         {
1950           vlib_buffer_t * p0;
1951           ip4_header_t * ip0;
1952           udp_header_t * udp0;
1953           ip4_fib_mtrie_t * mtrie0;
1954           ip4_fib_mtrie_leaf_t leaf0;
1955           ip_adjacency_t * adj0;
1956           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1957           i32 len_diff0;
1958           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1959       
1960           pi0 = to_next[0] = from[0];
1961           from += 1;
1962           n_left_from -= 1;
1963           to_next += 1;
1964           n_left_to_next -= 1;
1965       
1966           p0 = vlib_get_buffer (vm, pi0);
1967
1968           ip0 = vlib_buffer_get_current (p0);
1969
1970           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1971                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1972
1973           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1974
1975           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1976
1977           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1978
1979           proto0 = ip0->protocol;
1980           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1981           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1982
1983           flags0 = p0->flags;
1984
1985           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1986
1987           udp0 = ip4_next_header (ip0);
1988
1989           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1990           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1991
1992           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1993
1994           /* Verify UDP length. */
1995           ip_len0 = clib_net_to_host_u16 (ip0->length);
1996           udp_len0 = clib_net_to_host_u16 (udp0->length);
1997
1998           len_diff0 = ip_len0 - udp_len0;
1999
2000           len_diff0 = is_udp0 ? len_diff0 : 0;
2001
2002           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2003             {
2004               if (is_tcp_udp0)
2005                 {
2006                   if (is_tcp_udp0
2007                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2008                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2009                   good_tcp_udp0 =
2010                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2011                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2012                 }
2013             }
2014
2015           good_tcp_udp0 &= len_diff0 >= 0;
2016
2017           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2018
2019           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2020
2021           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2022
2023           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2024           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2025                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2026                     : error0);
2027
2028           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2029
2030           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2031           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2032
2033           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2034                                                            &ip0->src_address,
2035                                                            /* no_default_route */ 1));
2036
2037           adj0 = ip_get_adjacency (lm, adj_index0);
2038
2039           /* Must have a route to source otherwise we drop the packet. */
2040           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2041                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2042                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2043                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2044                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2045                     ? IP4_ERROR_SRC_LOOKUP_MISS
2046                     : error0);
2047
2048           next0 = lm->local_next_by_ip_protocol[proto0];
2049
2050           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2051
2052           p0->error = error0? error_node->errors[error0] : 0;
2053
2054           if (PREDICT_FALSE (next0 != next_index))
2055             {
2056               n_left_to_next += 1;
2057               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2058
2059               next_index = next0;
2060               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2061               to_next[0] = pi0;
2062               to_next += 1;
2063               n_left_to_next -= 1;
2064             }
2065         }
2066   
2067       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2068     }
2069
2070   return frame->n_vectors;
2071 }
2072
2073 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2074   .function = ip4_local,
2075   .name = "ip4-local",
2076   .vector_size = sizeof (u32),
2077
2078   .format_trace = format_ip4_forward_next_trace,
2079
2080   .n_next_nodes = IP_LOCAL_N_NEXT,
2081   .next_nodes = {
2082     [IP_LOCAL_NEXT_DROP] = "error-drop",
2083     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2084     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2085     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2086   },
2087 };
2088
2089 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2090
2091 void ip4_register_protocol (u32 protocol, u32 node_index)
2092 {
2093   vlib_main_t * vm = vlib_get_main();
2094   ip4_main_t * im = &ip4_main;
2095   ip_lookup_main_t * lm = &im->lookup_main;
2096
2097   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2098   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2099 }
2100
2101 static clib_error_t *
2102 show_ip_local_command_fn (vlib_main_t * vm,
2103                           unformat_input_t * input,
2104                          vlib_cli_command_t * cmd)
2105 {
2106   ip4_main_t * im = &ip4_main;
2107   ip_lookup_main_t * lm = &im->lookup_main;
2108   int i;
2109
2110   vlib_cli_output (vm, "Protocols handled by ip4_local");
2111   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2112     {
2113       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2114         vlib_cli_output (vm, "%d", i);
2115     }
2116   return 0;
2117 }
2118
2119
2120
2121 VLIB_CLI_COMMAND (show_ip_local, static) = {
2122   .path = "show ip local",
2123   .function = show_ip_local_command_fn,
2124   .short_help = "Show ip local protocol table",
2125 };
2126
2127 static uword
2128 ip4_arp (vlib_main_t * vm,
2129          vlib_node_runtime_t * node,
2130          vlib_frame_t * frame)
2131 {
2132   vnet_main_t * vnm = vnet_get_main();
2133   ip4_main_t * im = &ip4_main;
2134   ip_lookup_main_t * lm = &im->lookup_main;
2135   u32 * from, * to_next_drop;
2136   uword n_left_from, n_left_to_next_drop, next_index;
2137   static f64 time_last_seed_change = -1e100;
2138   static u32 hash_seeds[3];
2139   static uword hash_bitmap[256 / BITS (uword)]; 
2140   f64 time_now;
2141
2142   if (node->flags & VLIB_NODE_FLAG_TRACE)
2143     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2144
2145   time_now = vlib_time_now (vm);
2146   if (time_now - time_last_seed_change > 1e-3)
2147     {
2148       uword i;
2149       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2150                                              sizeof (hash_seeds));
2151       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2152         hash_seeds[i] = r[i];
2153
2154       /* Mark all hash keys as been no-seen before. */
2155       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2156         hash_bitmap[i] = 0;
2157
2158       time_last_seed_change = time_now;
2159     }
2160
2161   from = vlib_frame_vector_args (frame);
2162   n_left_from = frame->n_vectors;
2163   next_index = node->cached_next_index;
2164   if (next_index == IP4_ARP_NEXT_DROP)
2165     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2166
2167   while (n_left_from > 0)
2168     {
2169       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2170                            to_next_drop, n_left_to_next_drop);
2171
2172       while (n_left_from > 0 && n_left_to_next_drop > 0)
2173         {
2174           vlib_buffer_t * p0;
2175           ip4_header_t * ip0;
2176           ethernet_header_t * eh0;
2177           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2178           uword bm0;
2179           ip_adjacency_t * adj0;
2180
2181           pi0 = from[0];
2182
2183           p0 = vlib_get_buffer (vm, pi0);
2184
2185           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2186           adj0 = ip_get_adjacency (lm, adj_index0);
2187           ip0 = vlib_buffer_get_current (p0);
2188
2189           /* If packet destination is not local, send ARP to next hop */
2190           if (adj0->arp.next_hop.ip4.as_u32)
2191             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2192
2193           /* 
2194            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2195            * rewrite to this packet, we need to skip it here.
2196            * Note, to distinguish from src IP addr *.8.6.*, we
2197            * check for a bcast eth dest instead of IPv4 version.
2198            */
2199           eh0 = (ethernet_header_t*)ip0;
2200           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2201             {
2202               u32 vlan_num = 0;
2203               u16 * etype = &eh0->type;
2204               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2205                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2206                 {
2207                   vlan_num += 1;
2208                   etype += 2; //vlan tag also 16 bits, same as etype
2209                 }
2210               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2211                 {
2212                   vlib_buffer_advance (
2213                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2214                   ip0 = vlib_buffer_get_current (p0);
2215                 }
2216             }
2217
2218           a0 = hash_seeds[0];
2219           b0 = hash_seeds[1];
2220           c0 = hash_seeds[2];
2221
2222           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2223           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2224
2225           a0 ^= ip0->dst_address.data_u32;
2226           b0 ^= sw_if_index0;
2227
2228           hash_v3_finalize32 (a0, b0, c0);
2229
2230           c0 &= BITS (hash_bitmap) - 1;
2231           c0 = c0 / BITS (uword);
2232           m0 = (uword) 1 << (c0 % BITS (uword));
2233
2234           bm0 = hash_bitmap[c0];
2235           drop0 = (bm0 & m0) != 0;
2236
2237           /* Mark it as seen. */
2238           hash_bitmap[c0] = bm0 | m0;
2239
2240           from += 1;
2241           n_left_from -= 1;
2242           to_next_drop[0] = pi0;
2243           to_next_drop += 1;
2244           n_left_to_next_drop -= 1;
2245
2246           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2247
2248           if (drop0)
2249             continue;
2250
2251           /* 
2252            * Can happen if the control-plane is programming tables
2253            * with traffic flowing; at least that's today's lame excuse.
2254            */
2255           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2256             {
2257               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2258             }
2259           else
2260           /* Send ARP request. */
2261           {
2262             u32 bi0 = 0;
2263             vlib_buffer_t * b0;
2264             ethernet_arp_header_t * h0;
2265             vnet_hw_interface_t * hw_if0;
2266
2267             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2268
2269             /* Add rewrite/encap string for ARP packet. */
2270             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2271
2272             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2273
2274             /* Src ethernet address in ARP header. */
2275             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2276                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2277
2278             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2279
2280             /* Copy in destination address we are requesting. */
2281             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2282
2283             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2284             b0 = vlib_get_buffer (vm, bi0);
2285             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2286
2287             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2288
2289             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2290           }
2291         }
2292
2293       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2294     }
2295
2296   return frame->n_vectors;
2297 }
2298
2299 static char * ip4_arp_error_strings[] = {
2300   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2301   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2302   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2303   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2304   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2305 };
2306
2307 VLIB_REGISTER_NODE (ip4_arp_node) = {
2308   .function = ip4_arp,
2309   .name = "ip4-arp",
2310   .vector_size = sizeof (u32),
2311
2312   .format_trace = format_ip4_forward_next_trace,
2313
2314   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2315   .error_strings = ip4_arp_error_strings,
2316
2317   .n_next_nodes = IP4_ARP_N_NEXT,
2318   .next_nodes = {
2319     [IP4_ARP_NEXT_DROP] = "error-drop",
2320   },
2321 };
2322
2323 #define foreach_notrace_ip4_arp_error           \
2324 _(DROP)                                         \
2325 _(REQUEST_SENT)                                 \
2326 _(REPLICATE_DROP)                               \
2327 _(REPLICATE_FAIL)
2328
2329 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2330 {
2331   vlib_node_runtime_t *rt = 
2332     vlib_node_get_runtime (vm, ip4_arp_node.index);
2333
2334   /* don't trace ARP request packets */
2335 #define _(a)                                    \
2336     vnet_pcap_drop_trace_filter_add_del         \
2337         (rt->errors[IP4_ARP_ERROR_##a],         \
2338          1 /* is_add */);
2339     foreach_notrace_ip4_arp_error;
2340 #undef _
2341   return 0;
2342 }
2343
2344 VLIB_INIT_FUNCTION(arp_notrace_init);
2345
2346
2347 /* Send an ARP request to see if given destination is reachable on given interface. */
2348 clib_error_t *
2349 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2350 {
2351   vnet_main_t * vnm = vnet_get_main();
2352   ip4_main_t * im = &ip4_main;
2353   ethernet_arp_header_t * h;
2354   ip4_address_t * src;
2355   ip_interface_address_t * ia;
2356   ip_adjacency_t * adj;
2357   vnet_hw_interface_t * hi;
2358   vnet_sw_interface_t * si;
2359   vlib_buffer_t * b;
2360   u32 bi = 0;
2361
2362   si = vnet_get_sw_interface (vnm, sw_if_index);
2363
2364   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2365     {
2366       return clib_error_return (0, "%U: interface %U down",
2367                                 format_ip4_address, dst, 
2368                                 format_vnet_sw_if_index_name, vnm, 
2369                                 sw_if_index);
2370     }
2371
2372   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2373   if (! src)
2374     {
2375       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2376       return clib_error_return 
2377         (0, "no matching interface address for destination %U (interface %U)",
2378          format_ip4_address, dst,
2379          format_vnet_sw_if_index_name, vnm, sw_if_index);
2380     }
2381
2382   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2383
2384   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2385
2386   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2387
2388   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2389
2390   h->ip4_over_ethernet[0].ip4 = src[0];
2391   h->ip4_over_ethernet[1].ip4 = dst[0];
2392
2393   b = vlib_get_buffer (vm, bi);
2394   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2395
2396   /* Add encapsulation string for software interface (e.g. ethernet header). */
2397   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2398   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2399
2400   {
2401     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2402     u32 * to_next = vlib_frame_vector_args (f);
2403     to_next[0] = bi;
2404     f->n_vectors = 1;
2405     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2406   }
2407
2408   return /* no error */ 0;
2409 }
2410
2411 typedef enum {
2412   IP4_REWRITE_NEXT_DROP,
2413   IP4_REWRITE_NEXT_ARP,
2414 } ip4_rewrite_next_t;
2415
2416 always_inline uword
2417 ip4_rewrite_inline (vlib_main_t * vm,
2418                     vlib_node_runtime_t * node,
2419                     vlib_frame_t * frame,
2420                     int rewrite_for_locally_received_packets)
2421 {
2422   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2423   u32 * from = vlib_frame_vector_args (frame);
2424   u32 n_left_from, n_left_to_next, * to_next, next_index;
2425   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2426   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2427
2428   n_left_from = frame->n_vectors;
2429   next_index = node->cached_next_index;
2430   u32 cpu_index = os_get_cpu_number();
2431   
2432   while (n_left_from > 0)
2433     {
2434       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2435
2436       while (n_left_from >= 4 && n_left_to_next >= 2)
2437         {
2438           ip_adjacency_t * adj0, * adj1;
2439           vlib_buffer_t * p0, * p1;
2440           ip4_header_t * ip0, * ip1;
2441           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2442           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2443           u32 next0_override, next1_override;
2444       
2445           if (rewrite_for_locally_received_packets)
2446               next0_override = next1_override = 0;
2447
2448           /* Prefetch next iteration. */
2449           {
2450             vlib_buffer_t * p2, * p3;
2451
2452             p2 = vlib_get_buffer (vm, from[2]);
2453             p3 = vlib_get_buffer (vm, from[3]);
2454
2455             vlib_prefetch_buffer_header (p2, STORE);
2456             vlib_prefetch_buffer_header (p3, STORE);
2457
2458             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2459             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2460           }
2461
2462           pi0 = to_next[0] = from[0];
2463           pi1 = to_next[1] = from[1];
2464
2465           from += 2;
2466           n_left_from -= 2;
2467           to_next += 2;
2468           n_left_to_next -= 2;
2469       
2470           p0 = vlib_get_buffer (vm, pi0);
2471           p1 = vlib_get_buffer (vm, pi1);
2472
2473           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2474           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2475
2476           /* We should never rewrite a pkt using the MISS adjacency */
2477           ASSERT(adj_index0 && adj_index1);
2478
2479           ip0 = vlib_buffer_get_current (p0);
2480           ip1 = vlib_buffer_get_current (p1);
2481
2482           error0 = error1 = IP4_ERROR_NONE;
2483
2484           /* Decrement TTL & update checksum.
2485              Works either endian, so no need for byte swap. */
2486           if (! rewrite_for_locally_received_packets)
2487             {
2488               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2489
2490               /* Input node should have reject packets with ttl 0. */
2491               ASSERT (ip0->ttl > 0);
2492               ASSERT (ip1->ttl > 0);
2493
2494               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2495               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2496
2497               checksum0 += checksum0 >= 0xffff;
2498               checksum1 += checksum1 >= 0xffff;
2499
2500               ip0->checksum = checksum0;
2501               ip1->checksum = checksum1;
2502
2503               ttl0 -= 1;
2504               ttl1 -= 1;
2505
2506               ip0->ttl = ttl0;
2507               ip1->ttl = ttl1;
2508
2509               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2510               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2511
2512               /* Verify checksum. */
2513               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2514               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2515             }
2516
2517           /* Rewrite packet header and updates lengths. */
2518           adj0 = ip_get_adjacency (lm, adj_index0);
2519           adj1 = ip_get_adjacency (lm, adj_index1);
2520       
2521           if (rewrite_for_locally_received_packets)
2522             {
2523               /*
2524                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2525                * we end up here with a local adjacency in hand
2526                * The local adj rewrite data is 0xfefe on purpose.
2527                * Bad engineer, no donut for you.
2528                */
2529               if (PREDICT_FALSE(adj0->lookup_next_index 
2530                                 == IP_LOOKUP_NEXT_LOCAL))
2531                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2532               if (PREDICT_FALSE(adj0->lookup_next_index
2533                                 == IP_LOOKUP_NEXT_ARP))
2534                 next0_override = IP4_REWRITE_NEXT_ARP;
2535               if (PREDICT_FALSE(adj1->lookup_next_index 
2536                                 == IP_LOOKUP_NEXT_LOCAL))
2537                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2538               if (PREDICT_FALSE(adj1->lookup_next_index
2539                                 == IP_LOOKUP_NEXT_ARP))
2540                 next1_override = IP4_REWRITE_NEXT_ARP;
2541             }
2542
2543           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2544           rw_len0 = adj0[0].rewrite_header.data_bytes;
2545           rw_len1 = adj1[0].rewrite_header.data_bytes;
2546           next0 = (error0 == IP4_ERROR_NONE) 
2547             ? adj0[0].rewrite_header.next_index : 0;
2548
2549           if (rewrite_for_locally_received_packets)
2550               next0 = next0 && next0_override ? next0_override : next0;
2551
2552           next1 = (error1 == IP4_ERROR_NONE)
2553             ? adj1[0].rewrite_header.next_index : 0;
2554
2555           if (rewrite_for_locally_received_packets)
2556               next1 = next1 && next1_override ? next1_override : next1;
2557
2558           /* 
2559            * We've already accounted for an ethernet_header_t elsewhere
2560            */
2561           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2562               vlib_increment_combined_counter 
2563                   (&lm->adjacency_counters,
2564                    cpu_index, adj_index0, 
2565                    /* packet increment */ 0,
2566                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2567
2568           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2569               vlib_increment_combined_counter 
2570                   (&lm->adjacency_counters,
2571                    cpu_index, adj_index1, 
2572                    /* packet increment */ 0,
2573                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2574
2575           /* Check MTU of outgoing interface. */
2576           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2577                     ? IP4_ERROR_MTU_EXCEEDED
2578                     : error0);
2579           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2580                     ? IP4_ERROR_MTU_EXCEEDED
2581                     : error1);
2582
2583           p0->current_data -= rw_len0;
2584           p1->current_data -= rw_len1;
2585
2586           p0->current_length += rw_len0;
2587           p1->current_length += rw_len1;
2588
2589           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2590           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2591       
2592           p0->error = error_node->errors[error0];
2593           p1->error = error_node->errors[error1];
2594
2595           /* Guess we are only writing on simple Ethernet header. */
2596           vnet_rewrite_two_headers (adj0[0], adj1[0],
2597                                     ip0, ip1,
2598                                     sizeof (ethernet_header_t));
2599       
2600           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2601                                            to_next, n_left_to_next,
2602                                            pi0, pi1, next0, next1);
2603         }
2604
2605       while (n_left_from > 0 && n_left_to_next > 0)
2606         {
2607           ip_adjacency_t * adj0;
2608           vlib_buffer_t * p0;
2609           ip4_header_t * ip0;
2610           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2611           u32 next0_override;
2612       
2613           if (rewrite_for_locally_received_packets)
2614               next0_override = 0;
2615
2616           pi0 = to_next[0] = from[0];
2617
2618           p0 = vlib_get_buffer (vm, pi0);
2619
2620           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2621
2622           /* We should never rewrite a pkt using the MISS adjacency */
2623           ASSERT(adj_index0);
2624
2625           adj0 = ip_get_adjacency (lm, adj_index0);
2626       
2627           ip0 = vlib_buffer_get_current (p0);
2628
2629           error0 = IP4_ERROR_NONE;
2630           next0 = 0;            /* drop on error */
2631
2632           /* Decrement TTL & update checksum. */
2633           if (! rewrite_for_locally_received_packets)
2634             {
2635               i32 ttl0 = ip0->ttl;
2636
2637               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2638
2639               checksum0 += checksum0 >= 0xffff;
2640
2641               ip0->checksum = checksum0;
2642
2643               ASSERT (ip0->ttl > 0);
2644
2645               ttl0 -= 1;
2646
2647               ip0->ttl = ttl0;
2648
2649               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2650
2651               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2652             }
2653
2654           if (rewrite_for_locally_received_packets)
2655             {
2656               /*
2657                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2658                * we end up here with a local adjacency in hand
2659                * The local adj rewrite data is 0xfefe on purpose.
2660                * Bad engineer, no donut for you.
2661                */
2662               if (PREDICT_FALSE(adj0->lookup_next_index 
2663                                 == IP_LOOKUP_NEXT_LOCAL))
2664                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2665               /* 
2666                * We have to override the next_index in ARP adjacencies,
2667                * because they're set up for ip4-arp, not this node...
2668                */
2669               if (PREDICT_FALSE(adj0->lookup_next_index
2670                                 == IP_LOOKUP_NEXT_ARP))
2671                 next0_override = IP4_REWRITE_NEXT_ARP;
2672             }
2673
2674           /* Guess we are only writing on simple Ethernet header. */
2675           vnet_rewrite_one_header (adj0[0], ip0, 
2676                                    sizeof (ethernet_header_t));
2677           
2678           /* Update packet buffer attributes/set output interface. */
2679           rw_len0 = adj0[0].rewrite_header.data_bytes;
2680           
2681           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2682               vlib_increment_combined_counter 
2683                   (&lm->adjacency_counters,
2684                    cpu_index, adj_index0, 
2685                    /* packet increment */ 0,
2686                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2687           
2688           /* Check MTU of outgoing interface. */
2689           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2690                     > adj0[0].rewrite_header.max_l3_packet_bytes
2691                     ? IP4_ERROR_MTU_EXCEEDED
2692                     : error0);
2693           
2694           p0->error = error_node->errors[error0];
2695           p0->current_data -= rw_len0;
2696           p0->current_length += rw_len0;
2697           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2698             adj0[0].rewrite_header.sw_if_index;
2699           
2700           next0 = (error0 == IP4_ERROR_NONE)
2701             ? adj0[0].rewrite_header.next_index : 0;
2702
2703           if (rewrite_for_locally_received_packets)
2704               next0 = next0 && next0_override ? next0_override : next0;
2705
2706           from += 1;
2707           n_left_from -= 1;
2708           to_next += 1;
2709           n_left_to_next -= 1;
2710       
2711           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2712                                            to_next, n_left_to_next,
2713                                            pi0, next0);
2714         }
2715   
2716       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2717     }
2718
2719   /* Need to do trace after rewrites to pick up new packet data. */
2720   if (node->flags & VLIB_NODE_FLAG_TRACE)
2721     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2722
2723   return frame->n_vectors;
2724 }
2725
2726 static uword
2727 ip4_rewrite_transit (vlib_main_t * vm,
2728                      vlib_node_runtime_t * node,
2729                      vlib_frame_t * frame)
2730 {
2731   return ip4_rewrite_inline (vm, node, frame,
2732                              /* rewrite_for_locally_received_packets */ 0);
2733 }
2734
2735 static uword
2736 ip4_rewrite_local (vlib_main_t * vm,
2737                    vlib_node_runtime_t * node,
2738                    vlib_frame_t * frame)
2739 {
2740   return ip4_rewrite_inline (vm, node, frame,
2741                              /* rewrite_for_locally_received_packets */ 1);
2742 }
2743
2744 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2745   .function = ip4_rewrite_transit,
2746   .name = "ip4-rewrite-transit",
2747   .vector_size = sizeof (u32),
2748
2749   .format_trace = format_ip4_forward_next_trace,
2750
2751   .n_next_nodes = 2,
2752   .next_nodes = {
2753     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2754     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2755   },
2756 };
2757
2758 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2759
2760 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2761   .function = ip4_rewrite_local,
2762   .name = "ip4-rewrite-local",
2763   .vector_size = sizeof (u32),
2764
2765   .sibling_of = "ip4-rewrite-transit",
2766
2767   .format_trace = format_ip4_forward_next_trace,
2768
2769   .n_next_nodes = 2,
2770   .next_nodes = {
2771     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2772     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2773   },
2774 };
2775
2776 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2777
2778 static clib_error_t *
2779 add_del_interface_table (vlib_main_t * vm,
2780                          unformat_input_t * input,
2781                          vlib_cli_command_t * cmd)
2782 {
2783   vnet_main_t * vnm = vnet_get_main();
2784   clib_error_t * error = 0;
2785   u32 sw_if_index, table_id;
2786
2787   sw_if_index = ~0;
2788
2789   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2790     {
2791       error = clib_error_return (0, "unknown interface `%U'",
2792                                  format_unformat_error, input);
2793       goto done;
2794     }
2795
2796   if (unformat (input, "%d", &table_id))
2797     ;
2798   else
2799     {
2800       error = clib_error_return (0, "expected table id `%U'",
2801                                  format_unformat_error, input);
2802       goto done;
2803     }
2804
2805   {
2806     ip4_main_t * im = &ip4_main;
2807     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2808
2809     if (fib) 
2810       {
2811         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2812         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2813     }
2814   }
2815
2816  done:
2817   return error;
2818 }
2819
2820 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2821   .path = "set interface ip table",
2822   .function = add_del_interface_table,
2823   .short_help = "Add/delete FIB table id for interface",
2824 };
2825
2826
2827 static uword
2828 ip4_lookup_multicast (vlib_main_t * vm,
2829                       vlib_node_runtime_t * node,
2830                       vlib_frame_t * frame)
2831 {
2832   ip4_main_t * im = &ip4_main;
2833   ip_lookup_main_t * lm = &im->lookup_main;
2834   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2835   u32 n_left_from, n_left_to_next, * from, * to_next;
2836   ip_lookup_next_t next;
2837   u32 cpu_index = os_get_cpu_number();
2838
2839   from = vlib_frame_vector_args (frame);
2840   n_left_from = frame->n_vectors;
2841   next = node->cached_next_index;
2842
2843   while (n_left_from > 0)
2844     {
2845       vlib_get_next_frame (vm, node, next,
2846                            to_next, n_left_to_next);
2847
2848       while (n_left_from >= 4 && n_left_to_next >= 2)
2849         {
2850           vlib_buffer_t * p0, * p1;
2851           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2852           ip_lookup_next_t next0, next1;
2853           ip4_header_t * ip0, * ip1;
2854           ip_adjacency_t * adj0, * adj1;
2855           u32 fib_index0, fib_index1;
2856           u32 flow_hash_config0, flow_hash_config1;
2857
2858           /* Prefetch next iteration. */
2859           {
2860             vlib_buffer_t * p2, * p3;
2861
2862             p2 = vlib_get_buffer (vm, from[2]);
2863             p3 = vlib_get_buffer (vm, from[3]);
2864
2865             vlib_prefetch_buffer_header (p2, LOAD);
2866             vlib_prefetch_buffer_header (p3, LOAD);
2867
2868             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2869             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2870           }
2871
2872           pi0 = to_next[0] = from[0];
2873           pi1 = to_next[1] = from[1];
2874
2875           p0 = vlib_get_buffer (vm, pi0);
2876           p1 = vlib_get_buffer (vm, pi1);
2877
2878           ip0 = vlib_buffer_get_current (p0);
2879           ip1 = vlib_buffer_get_current (p1);
2880
2881           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2882           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2883           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2884             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2885           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2886             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2887
2888           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2889                                               &ip0->dst_address, p0);
2890           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2891                                               &ip1->dst_address, p1);
2892
2893           adj0 = ip_get_adjacency (lm, adj_index0);
2894           adj1 = ip_get_adjacency (lm, adj_index1);
2895
2896           next0 = adj0->lookup_next_index;
2897           next1 = adj1->lookup_next_index;
2898
2899           flow_hash_config0 = 
2900               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2901
2902           flow_hash_config1 = 
2903               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2904
2905           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2906               (ip0, flow_hash_config0);
2907                                                                   
2908           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2909               (ip1, flow_hash_config1);
2910
2911           ASSERT (adj0->n_adj > 0);
2912           ASSERT (adj1->n_adj > 0);
2913           ASSERT (is_pow2 (adj0->n_adj));
2914           ASSERT (is_pow2 (adj1->n_adj));
2915           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2916           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2917
2918           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2919           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2920
2921           if (1) /* $$$$$$ HACK FIXME */
2922           vlib_increment_combined_counter 
2923               (cm, cpu_index, adj_index0, 1,
2924                vlib_buffer_length_in_chain (vm, p0));
2925           if (1) /* $$$$$$ HACK FIXME */
2926           vlib_increment_combined_counter 
2927               (cm, cpu_index, adj_index1, 1,
2928                vlib_buffer_length_in_chain (vm, p1));
2929
2930           from += 2;
2931           to_next += 2;
2932           n_left_to_next -= 2;
2933           n_left_from -= 2;
2934
2935           wrong_next = (next0 != next) + 2*(next1 != next);
2936           if (PREDICT_FALSE (wrong_next != 0))
2937             {
2938               switch (wrong_next)
2939                 {
2940                 case 1:
2941                   /* A B A */
2942                   to_next[-2] = pi1;
2943                   to_next -= 1;
2944                   n_left_to_next += 1;
2945                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2946                   break;
2947
2948                 case 2:
2949                   /* A A B */
2950                   to_next -= 1;
2951                   n_left_to_next += 1;
2952                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2953                   break;
2954
2955                 case 3:
2956                   /* A B C */
2957                   to_next -= 2;
2958                   n_left_to_next += 2;
2959                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2960                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2961                   if (next0 == next1)
2962                     {
2963                       /* A B B */
2964                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2965                       next = next1;
2966                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2967                     }
2968                 }
2969             }
2970         }
2971     
2972       while (n_left_from > 0 && n_left_to_next > 0)
2973         {
2974           vlib_buffer_t * p0;
2975           ip4_header_t * ip0;
2976           u32 pi0, adj_index0;
2977           ip_lookup_next_t next0;
2978           ip_adjacency_t * adj0;
2979           u32 fib_index0;
2980           u32 flow_hash_config0;
2981
2982           pi0 = from[0];
2983           to_next[0] = pi0;
2984
2985           p0 = vlib_get_buffer (vm, pi0);
2986
2987           ip0 = vlib_buffer_get_current (p0);
2988
2989           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2990                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2991           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2992               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2993           
2994           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2995                                               &ip0->dst_address, p0);
2996
2997           adj0 = ip_get_adjacency (lm, adj_index0);
2998
2999           next0 = adj0->lookup_next_index;
3000
3001           flow_hash_config0 = 
3002               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3003
3004           vnet_buffer (p0)->ip.flow_hash = 
3005             ip4_compute_flow_hash (ip0, flow_hash_config0);
3006
3007           ASSERT (adj0->n_adj > 0);
3008           ASSERT (is_pow2 (adj0->n_adj));
3009           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3010
3011           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3012
3013           if (1) /* $$$$$$ HACK FIXME */
3014               vlib_increment_combined_counter 
3015                   (cm, cpu_index, adj_index0, 1,
3016                    vlib_buffer_length_in_chain (vm, p0));
3017
3018           from += 1;
3019           to_next += 1;
3020           n_left_to_next -= 1;
3021           n_left_from -= 1;
3022
3023           if (PREDICT_FALSE (next0 != next))
3024             {
3025               n_left_to_next += 1;
3026               vlib_put_next_frame (vm, node, next, n_left_to_next);
3027               next = next0;
3028               vlib_get_next_frame (vm, node, next,
3029                                    to_next, n_left_to_next);
3030               to_next[0] = pi0;
3031               to_next += 1;
3032               n_left_to_next -= 1;
3033             }
3034         }
3035
3036       vlib_put_next_frame (vm, node, next, n_left_to_next);
3037     }
3038
3039   return frame->n_vectors;
3040 }
3041
3042 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3043   .function = ip4_lookup_multicast,
3044   .name = "ip4-lookup-multicast",
3045   .vector_size = sizeof (u32),
3046
3047   .n_next_nodes = IP_LOOKUP_N_NEXT,
3048   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3049 };
3050
3051 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3052
3053 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3054   .function = ip4_drop,
3055   .name = "ip4-multicast",
3056   .vector_size = sizeof (u32),
3057
3058   .format_trace = format_ip4_forward_next_trace,
3059
3060   .n_next_nodes = 1,
3061   .next_nodes = {
3062     [0] = "error-drop",
3063   },
3064 };
3065
3066 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3067 {
3068   ip4_main_t * im = &ip4_main;
3069   ip4_fib_mtrie_t * mtrie0;
3070   ip4_fib_mtrie_leaf_t leaf0;
3071   u32 adj_index0;
3072     
3073   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3074
3075   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3076   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3077   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3078   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3079   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3080   
3081   /* Handle default route. */
3082   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3083   
3084   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3085   
3086   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3087                                                   a, 
3088                                                   /* no_default_route */ 0);
3089 }
3090  
3091 static clib_error_t *
3092 test_lookup_command_fn (vlib_main_t * vm,
3093                         unformat_input_t * input,
3094                         vlib_cli_command_t * cmd)
3095 {
3096   u32 table_id = 0;
3097   f64 count = 1;
3098   u32 n;
3099   int i;
3100   ip4_address_t ip4_base_address;
3101   u64 errors = 0;
3102
3103   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3104       if (unformat (input, "table %d", &table_id))
3105         ;
3106       else if (unformat (input, "count %f", &count))
3107         ;
3108
3109       else if (unformat (input, "%U",
3110                          unformat_ip4_address, &ip4_base_address))
3111         ;
3112       else
3113         return clib_error_return (0, "unknown input `%U'",
3114                                   format_unformat_error, input);
3115   }
3116
3117   n = count;
3118
3119   for (i = 0; i < n; i++)
3120     {
3121       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3122         errors++;
3123
3124       ip4_base_address.as_u32 = 
3125         clib_host_to_net_u32 (1 + 
3126                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3127     }
3128
3129   if (errors) 
3130     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3131   else
3132     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3133
3134   return 0;
3135 }
3136
3137 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3138     .path = "test lookup",
3139     .short_help = "test lookup",
3140     .function = test_lookup_command_fn,
3141 };
3142
3143 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3144 {
3145   ip4_main_t * im4 = &ip4_main;
3146   ip4_fib_t * fib;
3147   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3148
3149   if (p == 0)
3150     return VNET_API_ERROR_NO_SUCH_FIB;
3151
3152   fib = vec_elt_at_index (im4->fibs, p[0]);
3153
3154   fib->flow_hash_config = flow_hash_config;
3155   return 0;
3156 }
3157  
3158 static clib_error_t *
3159 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3160                              unformat_input_t * input,
3161                              vlib_cli_command_t * cmd)
3162 {
3163   int matched = 0;
3164   u32 table_id = 0;
3165   u32 flow_hash_config = 0;
3166   int rv;
3167
3168   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3169     if (unformat (input, "table %d", &table_id))
3170       matched = 1;
3171 #define _(a,v) \
3172     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3173     foreach_flow_hash_bit
3174 #undef _
3175     else break;
3176   }
3177   
3178   if (matched == 0)
3179     return clib_error_return (0, "unknown input `%U'",
3180                               format_unformat_error, input);
3181   
3182   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3183   switch (rv)
3184     {
3185     case 0:
3186       break;
3187       
3188     case VNET_API_ERROR_NO_SUCH_FIB:
3189       return clib_error_return (0, "no such FIB table %d", table_id);
3190       
3191     default:
3192       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3193       break;
3194     }
3195   
3196   return 0;
3197 }
3198  
3199 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3200   .path = "set ip flow-hash",
3201   .short_help = 
3202   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3203   .function = set_ip_flow_hash_command_fn,
3204 };
3205  
3206 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3207                                  u32 table_index)
3208 {
3209   vnet_main_t * vnm = vnet_get_main();
3210   vnet_interface_main_t * im = &vnm->interface_main;
3211   ip4_main_t * ipm = &ip4_main;
3212   ip_lookup_main_t * lm = &ipm->lookup_main;
3213   vnet_classify_main_t * cm = &vnet_classify_main;
3214
3215   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3216     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3217
3218   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3219     return VNET_API_ERROR_NO_SUCH_ENTRY;
3220
3221   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3222   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3223
3224   return 0;
3225 }
3226
3227 static clib_error_t *
3228 set_ip_classify_command_fn (vlib_main_t * vm,
3229                             unformat_input_t * input,
3230                             vlib_cli_command_t * cmd)
3231 {
3232   u32 table_index = ~0;
3233   int table_index_set = 0;
3234   u32 sw_if_index = ~0;
3235   int rv;
3236   
3237   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3238     if (unformat (input, "table-index %d", &table_index))
3239       table_index_set = 1;
3240     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3241                        vnet_get_main(), &sw_if_index))
3242       ;
3243     else
3244       break;
3245   }
3246       
3247   if (table_index_set == 0)
3248     return clib_error_return (0, "classify table-index must be specified");
3249
3250   if (sw_if_index == ~0)
3251     return clib_error_return (0, "interface / subif must be specified");
3252
3253   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3254
3255   switch (rv)
3256     {
3257     case 0:
3258       break;
3259
3260     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3261       return clib_error_return (0, "No such interface");
3262
3263     case VNET_API_ERROR_NO_SUCH_ENTRY:
3264       return clib_error_return (0, "No such classifier table");
3265     }
3266   return 0;
3267 }
3268
3269 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3270     .path = "set ip classify",
3271     .short_help = 
3272     "set ip classify intfc <int> table-index <index>",
3273     .function = set_ip_classify_command_fn,
3274 };
3275