cb29d30ad37881f45a2f4eba3f6e57d5567ebbd0
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       if (table_index_or_id == ~0) {
107         table_index_or_id = 0;
108         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
109           table_index_or_id++;
110         }
111         return create_fib_with_table_id (im, table_index_or_id);
112       }
113
114       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
115       if (! p)
116         return create_fib_with_table_id (im, table_index_or_id);
117       fib_index = p[0];
118     }
119   return vec_elt_at_index (im->fibs, fib_index);
120 }
121
122 static void
123 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
124                                        ip4_fib_t * fib,
125                                        u32 address_length)
126 {
127   hash_t * h;
128   uword max_index;
129
130   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
131   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
132
133   fib->adj_index_by_dst_address[address_length] =
134     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
135
136   hash_set_flags (fib->adj_index_by_dst_address[address_length],
137                   HASH_FLAG_NO_AUTO_SHRINK);
138
139   h = hash_header (fib->adj_index_by_dst_address[address_length]);
140   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
141
142   /* Initialize new/old hash value vectors. */
143   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
144   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
145 }
146
147 static void
148 ip4_fib_set_adj_index (ip4_main_t * im,
149                        ip4_fib_t * fib,
150                        u32 flags,
151                        u32 dst_address_u32,
152                        u32 dst_address_length,
153                        u32 adj_index)
154 {
155   ip_lookup_main_t * lm = &im->lookup_main;
156   uword * hash;
157
158   if (vec_bytes(fib->old_hash_values))
159     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
160   if (vec_bytes(fib->new_hash_values))
161     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
162   fib->new_hash_values[0] = adj_index;
163
164   /* Make sure adj index is valid. */
165   if (CLIB_DEBUG > 0)
166     (void) ip_get_adjacency (lm, adj_index);
167
168   hash = fib->adj_index_by_dst_address[dst_address_length];
169
170   hash = _hash_set3 (hash, dst_address_u32,
171                      fib->new_hash_values,
172                      fib->old_hash_values);
173
174   fib->adj_index_by_dst_address[dst_address_length] = hash;
175
176   if (vec_len (im->add_del_route_callbacks) > 0)
177     {
178       ip4_add_del_route_callback_t * cb;
179       ip4_address_t d;
180       uword * p;
181
182       d.data_u32 = dst_address_u32;
183       vec_foreach (cb, im->add_del_route_callbacks)
184         if ((flags & cb->required_flags) == cb->required_flags)
185           cb->function (im, cb->function_opaque,
186                         fib, flags,
187                         &d, dst_address_length,
188                         fib->old_hash_values,
189                         fib->new_hash_values);
190
191       p = hash_get (hash, dst_address_u32);
192       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
193     }
194 }
195
196 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
197 {
198   ip_lookup_main_t * lm = &im->lookup_main;
199   ip4_fib_t * fib;
200   u32 dst_address, dst_address_length, adj_index, old_adj_index;
201   uword * hash, is_del;
202   ip4_add_del_route_callback_t * cb;
203
204   /* Either create new adjacency or use given one depending on arguments. */
205   if (a->n_add_adj > 0)
206     {
207       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
208       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
209     }
210   else
211     adj_index = a->adj_index;
212
213   dst_address = a->dst_address.data_u32;
214   dst_address_length = a->dst_address_length;
215   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
216
217   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
218   dst_address &= im->fib_masks[dst_address_length];
219
220   if (! fib->adj_index_by_dst_address[dst_address_length])
221     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
222
223   hash = fib->adj_index_by_dst_address[dst_address_length];
224
225   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
226
227   if (is_del)
228     {
229       fib->old_hash_values[0] = ~0;
230       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
231       fib->adj_index_by_dst_address[dst_address_length] = hash;
232
233       if (vec_len (im->add_del_route_callbacks) > 0
234           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
235         {
236           fib->new_hash_values[0] = ~0;
237           vec_foreach (cb, im->add_del_route_callbacks)
238             if ((a->flags & cb->required_flags) == cb->required_flags)
239               cb->function (im, cb->function_opaque,
240                             fib, a->flags,
241                             &a->dst_address, dst_address_length,
242                             fib->old_hash_values,
243                             fib->new_hash_values);
244         }
245     }
246   else
247     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
248                            adj_index);
249
250   old_adj_index = fib->old_hash_values[0];
251
252   /* Avoid spurious reference count increments */
253   if (old_adj_index == adj_index
254       && adj_index != ~0
255       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
256     {
257       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
258       if (adj->share_count > 0)
259         adj->share_count --;
260     }
261
262   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
263                                is_del ? old_adj_index : adj_index,
264                                is_del);
265
266   /* Delete old adjacency index if present and changed. */
267   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
268       && old_adj_index != ~0
269       && old_adj_index != adj_index)
270     ip_del_adjacency (lm, old_adj_index);
271 }
272
273 void
274 ip4_add_del_route_next_hop (ip4_main_t * im,
275                             u32 flags,
276                             ip4_address_t * dst_address,
277                             u32 dst_address_length,
278                             ip4_address_t * next_hop,
279                             u32 next_hop_sw_if_index,
280                             u32 next_hop_weight, u32 adj_index, 
281                             u32 explicit_fib_index)
282 {
283   vnet_main_t * vnm = vnet_get_main();
284   ip_lookup_main_t * lm = &im->lookup_main;
285   u32 fib_index;
286   ip4_fib_t * fib;
287   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
288   u32 dst_adj_index, nh_adj_index;
289   uword * dst_hash, * dst_result;
290   uword * nh_hash, * nh_result;
291   ip_adjacency_t * dst_adj;
292   ip_multipath_adjacency_t * old_mp, * new_mp;
293   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
294   int is_interface_next_hop;
295   clib_error_t * error = 0;
296
297   if (explicit_fib_index == (u32)~0)
298       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
299   else
300       fib_index = explicit_fib_index;
301
302   fib = vec_elt_at_index (im->fibs, fib_index);
303   
304   /* Lookup next hop to be added or deleted. */
305   is_interface_next_hop = next_hop->data_u32 == 0;
306   if (adj_index == (u32)~0)
307     {
308       if (is_interface_next_hop)
309         {
310           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
311           if (nh_result)
312             nh_adj_index = *nh_result;
313           else
314             {
315               ip_adjacency_t * adj;
316               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
317                                       &nh_adj_index);
318               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
319               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
320               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
321             }
322         }
323       else
324         {
325           nh_hash = fib->adj_index_by_dst_address[32];
326           nh_result = hash_get (nh_hash, next_hop->data_u32);
327           
328           /* Next hop must be known. */
329           if (! nh_result)
330             {
331               ip_adjacency_t * adj;
332
333               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
334                                                         next_hop, 0);
335               adj = ip_get_adjacency (lm, nh_adj_index);
336               /* if ARP interface adjacencty is present, we need to
337                  install ARP adjaceny for specific next hop */
338               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
339                   adj->arp.next_hop.ip4.as_u32 == 0)
340                 {
341                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
342                 }
343               else
344                 {
345                   /* Next hop is not known, so create indirect adj */
346                   ip_adjacency_t add_adj;
347                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
348                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
349                   add_adj.explicit_fib_index = explicit_fib_index;
350                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
351                 }
352             }
353           else
354             nh_adj_index = *nh_result;
355         }
356     }
357   else
358     {
359       nh_adj_index = adj_index;
360     }
361   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
362   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
363
364   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
365   dst_result = hash_get (dst_hash, dst_address_u32);
366   if (dst_result)
367     {
368       dst_adj_index = dst_result[0];
369       dst_adj = ip_get_adjacency (lm, dst_adj_index);
370     }
371   else
372     {
373       /* For deletes destination must be known. */
374       if (is_del)
375         {
376           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
377           error = clib_error_return (0, "unknown destination %U/%d",
378                                      format_ip4_address, dst_address,
379                                      dst_address_length);
380           goto done;
381         }
382
383       dst_adj_index = ~0;
384       dst_adj = 0;
385     }
386
387   /* Ignore adds of X/32 with next hop of X. */
388   if (! is_del
389       && dst_address_length == 32
390       && dst_address->data_u32 == next_hop->data_u32 
391       && adj_index != (u32)~0)
392     {
393       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
394       error = clib_error_return (0, "prefix matches next hop %U/%d",
395                                  format_ip4_address, dst_address,
396                                  dst_address_length);
397       goto done;
398     }
399
400   /* Destination is not known and default weight is set so add route
401      to existing non-multipath adjacency */
402   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
403     {
404       /* create new adjacency */
405       ip4_add_del_route_args_t a;
406       a.table_index_or_table_id = fib_index;
407       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
408                  | IP4_ROUTE_FLAG_FIB_INDEX
409                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
410                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
411                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
412       a.dst_address = dst_address[0];
413       a.dst_address_length = dst_address_length;
414       a.adj_index = nh_adj_index;
415       a.add_adj = 0;
416       a.n_add_adj = 0;
417
418       ip4_add_del_route (im, &a);
419
420       goto done;
421     }
422
423   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
424
425   if (! ip_multipath_adjacency_add_del_next_hop
426       (lm, is_del,
427        old_mp_adj_index,
428        nh_adj_index,
429        next_hop_weight,
430        &new_mp_adj_index))
431     {
432       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
433       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
434                                  format_ip4_address, next_hop);
435       goto done;
436     }
437   
438   old_mp = new_mp = 0;
439   if (old_mp_adj_index != ~0)
440     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
441   if (new_mp_adj_index != ~0)
442     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
443
444   if (old_mp != new_mp)
445     {
446       ip4_add_del_route_args_t a;
447       a.table_index_or_table_id = fib_index;
448       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
449                  | IP4_ROUTE_FLAG_FIB_INDEX
450                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
451                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
452       a.dst_address = dst_address[0];
453       a.dst_address_length = dst_address_length;
454       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
455       a.add_adj = 0;
456       a.n_add_adj = 0;
457
458       ip4_add_del_route (im, &a);
459     }
460
461  done:
462   if (error)
463     clib_error_report (error);
464 }
465
466 void *
467 ip4_get_route (ip4_main_t * im,
468                u32 table_index_or_table_id,
469                u32 flags,
470                u8 * address,
471                u32 address_length)
472 {
473   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
474   u32 dst_address = * (u32 *) address;
475   uword * hash, * p;
476
477   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
478   dst_address &= im->fib_masks[address_length];
479
480   hash = fib->adj_index_by_dst_address[address_length];
481   p = hash_get (hash, dst_address);
482   return (void *) p;
483 }
484
485 void
486 ip4_foreach_matching_route (ip4_main_t * im,
487                             u32 table_index_or_table_id,
488                             u32 flags,
489                             ip4_address_t * address,
490                             u32 address_length,
491                             ip4_address_t ** results,
492                             u8 ** result_lengths)
493 {
494   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
495   u32 dst_address = address->data_u32;
496   u32 this_length = address_length;
497   
498   if (*results)
499     _vec_len (*results) = 0;
500   if (*result_lengths)
501     _vec_len (*result_lengths) = 0;
502
503   while (this_length <= 32 && vec_len (results) == 0)
504     {
505       uword k, v;
506       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
507         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
508           {
509             ip4_address_t a;
510             a.data_u32 = k;
511             vec_add1 (*results, a);
512             vec_add1 (*result_lengths, this_length);
513           }
514       }));
515
516       this_length++;
517     }
518 }
519
520 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
521                                   u32 table_index_or_table_id,
522                                   u32 flags)
523 {
524   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
525   ip_lookup_main_t * lm = &im->lookup_main;
526   u32 i, l;
527   ip4_address_t a;
528   ip4_add_del_route_callback_t * cb;
529   static ip4_address_t * to_delete;
530
531   if (lm->n_adjacency_remaps == 0)
532     return;
533
534   for (l = 0; l <= 32; l++)
535     {
536       hash_pair_t * p;
537       uword * hash = fib->adj_index_by_dst_address[l];
538
539       if (hash_elts (hash) == 0)
540         continue;
541
542       if (to_delete)
543         _vec_len (to_delete) = 0;
544
545       hash_foreach_pair (p, hash, ({
546         u32 adj_index = p->value[0];
547         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
548
549         if (m)
550           {
551             /* Record destination address from hash key. */
552             a.data_u32 = p->key;
553
554             /* New adjacency points to nothing: so delete prefix. */
555             if (m == ~0)
556               vec_add1 (to_delete, a);
557             else
558               {
559                 /* Remap to new adjacency. */
560                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
561
562                 /* Set new adjacency value. */
563                 fib->new_hash_values[0] = p->value[0] = m - 1;
564
565                 vec_foreach (cb, im->add_del_route_callbacks)
566                   if ((flags & cb->required_flags) == cb->required_flags)
567                     cb->function (im, cb->function_opaque,
568                                   fib, flags | IP4_ROUTE_FLAG_ADD,
569                                   &a, l,
570                                   fib->old_hash_values,
571                                   fib->new_hash_values);
572               }
573           }
574       }));
575
576       fib->new_hash_values[0] = ~0;
577       for (i = 0; i < vec_len (to_delete); i++)
578         {
579           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
580           vec_foreach (cb, im->add_del_route_callbacks)
581             if ((flags & cb->required_flags) == cb->required_flags)
582               cb->function (im, cb->function_opaque,
583                             fib, flags | IP4_ROUTE_FLAG_DEL,
584                             &a, l,
585                             fib->old_hash_values,
586                             fib->new_hash_values);
587         }
588     }
589
590   /* Also remap adjacencies in mtrie. */
591   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
592
593   /* Reset mapping table. */
594   vec_zero (lm->adjacency_remap_table);
595
596   /* All remaps have been performed. */
597   lm->n_adjacency_remaps = 0;
598 }
599
600 void ip4_delete_matching_routes (ip4_main_t * im,
601                                  u32 table_index_or_table_id,
602                                  u32 flags,
603                                  ip4_address_t * address,
604                                  u32 address_length)
605 {
606   static ip4_address_t * matching_addresses;
607   static u8 * matching_address_lengths;
608   u32 l, i;
609   ip4_add_del_route_args_t a;
610
611   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
612   a.table_index_or_table_id = table_index_or_table_id;
613   a.adj_index = ~0;
614   a.add_adj = 0;
615   a.n_add_adj = 0;
616
617   for (l = address_length + 1; l <= 32; l++)
618     {
619       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
620                                   address,
621                                   l,
622                                   &matching_addresses,
623                                   &matching_address_lengths);
624       for (i = 0; i < vec_len (matching_addresses); i++)
625         {
626           a.dst_address = matching_addresses[i];
627           a.dst_address_length = matching_address_lengths[i];
628           ip4_add_del_route (im, &a);
629         }
630     }
631
632   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
633 }
634
635 always_inline uword
636 ip4_lookup_inline (vlib_main_t * vm,
637                    vlib_node_runtime_t * node,
638                    vlib_frame_t * frame,
639                    int lookup_for_responses_to_locally_received_packets,
640                    int is_indirect)
641 {
642   ip4_main_t * im = &ip4_main;
643   ip_lookup_main_t * lm = &im->lookup_main;
644   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
645   u32 n_left_from, n_left_to_next, * from, * to_next;
646   ip_lookup_next_t next;
647   u32 cpu_index = os_get_cpu_number();
648
649   from = vlib_frame_vector_args (frame);
650   n_left_from = frame->n_vectors;
651   next = node->cached_next_index;
652
653   while (n_left_from > 0)
654     {
655       vlib_get_next_frame (vm, node, next,
656                            to_next, n_left_to_next);
657
658       while (n_left_from >= 4 && n_left_to_next >= 2)
659         {
660           vlib_buffer_t * p0, * p1;
661           ip4_header_t * ip0, * ip1;
662           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
663           ip_lookup_next_t next0, next1;
664           ip_adjacency_t * adj0, * adj1;
665           ip4_fib_mtrie_t * mtrie0, * mtrie1;
666           ip4_fib_mtrie_leaf_t leaf0, leaf1;
667           ip4_address_t * dst_addr0, *dst_addr1;
668           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
669           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
670           u32 flow_hash_config0, flow_hash_config1;
671           u32 hash_c0, hash_c1;
672           u32 wrong_next;
673
674           /* Prefetch next iteration. */
675           {
676             vlib_buffer_t * p2, * p3;
677
678             p2 = vlib_get_buffer (vm, from[2]);
679             p3 = vlib_get_buffer (vm, from[3]);
680
681             vlib_prefetch_buffer_header (p2, LOAD);
682             vlib_prefetch_buffer_header (p3, LOAD);
683
684             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
685             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
686           }
687
688           pi0 = to_next[0] = from[0];
689           pi1 = to_next[1] = from[1];
690
691           p0 = vlib_get_buffer (vm, pi0);
692           p1 = vlib_get_buffer (vm, pi1);
693
694           ip0 = vlib_buffer_get_current (p0);
695           ip1 = vlib_buffer_get_current (p1);
696
697           if (is_indirect)
698             {
699               ip_adjacency_t * iadj0, * iadj1;
700               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
701               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
702               dst_addr0 = &iadj0->indirect.next_hop.ip4;
703               dst_addr1 = &iadj1->indirect.next_hop.ip4;
704             }
705           else
706             {
707               dst_addr0 = &ip0->dst_address;
708               dst_addr1 = &ip1->dst_address;
709             }
710
711           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
712           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
713           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
714             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
715           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
716             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
717
718
719           if (! lookup_for_responses_to_locally_received_packets)
720             {
721               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
722               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
723
724               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
725
726               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
727               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
728             }
729
730           tcp0 = (void *) (ip0 + 1);
731           tcp1 = (void *) (ip1 + 1);
732
733           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
734                          || ip0->protocol == IP_PROTOCOL_UDP);
735           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
736                          || ip1->protocol == IP_PROTOCOL_UDP);
737
738           if (! lookup_for_responses_to_locally_received_packets)
739             {
740               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
741               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
742             }
743
744           if (! lookup_for_responses_to_locally_received_packets)
745             {
746               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
747               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
748             }
749
750           if (! lookup_for_responses_to_locally_received_packets)
751             {
752               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
753               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
754             }
755
756           if (lookup_for_responses_to_locally_received_packets)
757             {
758               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
759               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
760             }
761           else
762             {
763               /* Handle default route. */
764               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
765               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
766
767               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
768               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
769             }
770
771           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
772                                                            dst_addr0,
773                                                            /* no_default_route */ 0));
774           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
775                                                            dst_addr1,
776                                                            /* no_default_route */ 0));
777           adj0 = ip_get_adjacency (lm, adj_index0);
778           adj1 = ip_get_adjacency (lm, adj_index1);
779
780           next0 = adj0->lookup_next_index;
781           next1 = adj1->lookup_next_index;
782
783           /* Use flow hash to compute multipath adjacency. */
784           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
785           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
786           if (PREDICT_FALSE (adj0->n_adj > 1))
787             {
788               flow_hash_config0 = 
789                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
790               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
791                 ip4_compute_flow_hash (ip0, flow_hash_config0);
792             }
793           if (PREDICT_FALSE(adj1->n_adj > 1))
794             {
795               flow_hash_config1 = 
796                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
797               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
798                 ip4_compute_flow_hash (ip1, flow_hash_config1);
799             }
800
801           ASSERT (adj0->n_adj > 0);
802           ASSERT (adj1->n_adj > 0);
803           ASSERT (is_pow2 (adj0->n_adj));
804           ASSERT (is_pow2 (adj1->n_adj));
805           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
806           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
807
808           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
809           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
810
811           vlib_increment_combined_counter 
812               (cm, cpu_index, adj_index0, 1,
813                vlib_buffer_length_in_chain (vm, p0) 
814                + sizeof(ethernet_header_t));
815           vlib_increment_combined_counter 
816               (cm, cpu_index, adj_index1, 1,
817                vlib_buffer_length_in_chain (vm, p1)
818                + sizeof(ethernet_header_t));
819
820           from += 2;
821           to_next += 2;
822           n_left_to_next -= 2;
823           n_left_from -= 2;
824
825           wrong_next = (next0 != next) + 2*(next1 != next);
826           if (PREDICT_FALSE (wrong_next != 0))
827             {
828               switch (wrong_next)
829                 {
830                 case 1:
831                   /* A B A */
832                   to_next[-2] = pi1;
833                   to_next -= 1;
834                   n_left_to_next += 1;
835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
836                   break;
837
838                 case 2:
839                   /* A A B */
840                   to_next -= 1;
841                   n_left_to_next += 1;
842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
843                   break;
844
845                 case 3:
846                   /* A B C */
847                   to_next -= 2;
848                   n_left_to_next += 2;
849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
851                   if (next0 == next1)
852                     {
853                       /* A B B */
854                       vlib_put_next_frame (vm, node, next, n_left_to_next);
855                       next = next1;
856                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
857                     }
858                 }
859             }
860         }
861     
862       while (n_left_from > 0 && n_left_to_next > 0)
863         {
864           vlib_buffer_t * p0;
865           ip4_header_t * ip0;
866           __attribute__((unused)) tcp_header_t * tcp0;
867           ip_lookup_next_t next0;
868           ip_adjacency_t * adj0;
869           ip4_fib_mtrie_t * mtrie0;
870           ip4_fib_mtrie_leaf_t leaf0;
871           ip4_address_t * dst_addr0;
872           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
873           u32 flow_hash_config0, hash_c0;
874
875           pi0 = from[0];
876           to_next[0] = pi0;
877
878           p0 = vlib_get_buffer (vm, pi0);
879
880           ip0 = vlib_buffer_get_current (p0);
881
882           if (is_indirect)
883             {
884               ip_adjacency_t * iadj0;
885               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
886               dst_addr0 = &iadj0->indirect.next_hop.ip4;
887             }
888           else
889             {
890               dst_addr0 = &ip0->dst_address;
891             }
892
893           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
894           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
895             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
896
897           if (! lookup_for_responses_to_locally_received_packets)
898             {
899               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
900
901               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
902
903               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
904             }
905
906           tcp0 = (void *) (ip0 + 1);
907
908           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
909                          || ip0->protocol == IP_PROTOCOL_UDP);
910
911           if (! lookup_for_responses_to_locally_received_packets)
912             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
913
914           if (! lookup_for_responses_to_locally_received_packets)
915             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
916
917           if (! lookup_for_responses_to_locally_received_packets)
918             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
919
920           if (lookup_for_responses_to_locally_received_packets)
921             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
922           else
923             {
924               /* Handle default route. */
925               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
926               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
927             }
928
929           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
930                                                            dst_addr0,
931                                                            /* no_default_route */ 0));
932
933           adj0 = ip_get_adjacency (lm, adj_index0);
934
935           next0 = adj0->lookup_next_index;
936
937           /* Use flow hash to compute multipath adjacency. */
938           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
939           if (PREDICT_FALSE(adj0->n_adj > 1))
940             {
941               flow_hash_config0 = 
942                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
943
944               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
945                 ip4_compute_flow_hash (ip0, flow_hash_config0);
946             }
947
948           ASSERT (adj0->n_adj > 0);
949           ASSERT (is_pow2 (adj0->n_adj));
950           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
951
952           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
953
954           vlib_increment_combined_counter 
955               (cm, cpu_index, adj_index0, 1,
956                vlib_buffer_length_in_chain (vm, p0)
957                + sizeof(ethernet_header_t));
958
959           from += 1;
960           to_next += 1;
961           n_left_to_next -= 1;
962           n_left_from -= 1;
963
964           if (PREDICT_FALSE (next0 != next))
965             {
966               n_left_to_next += 1;
967               vlib_put_next_frame (vm, node, next, n_left_to_next);
968               next = next0;
969               vlib_get_next_frame (vm, node, next,
970                                    to_next, n_left_to_next);
971               to_next[0] = pi0;
972               to_next += 1;
973               n_left_to_next -= 1;
974             }
975         }
976
977       vlib_put_next_frame (vm, node, next, n_left_to_next);
978     }
979
980   return frame->n_vectors;
981 }
982
983 static uword
984 ip4_lookup (vlib_main_t * vm,
985             vlib_node_runtime_t * node,
986             vlib_frame_t * frame)
987 {
988   return ip4_lookup_inline (vm, node, frame,
989                             /* lookup_for_responses_to_locally_received_packets */ 0,
990                             /* is_indirect */ 0);
991
992 }
993
994 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
995                                         ip_adjacency_t * adj,
996                                         u32 sw_if_index,
997                                         u32 if_address_index)
998 {
999   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1000   ip_lookup_next_t n;
1001   vnet_l3_packet_type_t packet_type;
1002   u32 node_index;
1003
1004   if (hw->hw_class_index == ethernet_hw_interface_class.index
1005       || hw->hw_class_index == srp_hw_interface_class.index)
1006     {
1007       /* 
1008        * We have a bit of a problem in this case. ip4-arp uses
1009        * the rewrite_header.next_index to hand pkts to the
1010        * indicated inteface output node. We can end up in
1011        * ip4_rewrite_local, too, which also pays attention to 
1012        * rewrite_header.next index. Net result: a hack in
1013        * ip4_rewrite_local...
1014        */
1015       n = IP_LOOKUP_NEXT_ARP;
1016       node_index = ip4_arp_node.index;
1017       adj->if_address_index = if_address_index;
1018       adj->arp.next_hop.ip4.as_u32 = 0;
1019       ip46_address_reset(&adj->arp.next_hop);
1020       packet_type = VNET_L3_PACKET_TYPE_ARP;
1021     }
1022   else
1023     {
1024       n = IP_LOOKUP_NEXT_REWRITE;
1025       node_index = ip4_rewrite_node.index;
1026       packet_type = VNET_L3_PACKET_TYPE_IP4;
1027     }
1028
1029   adj->lookup_next_index = n;
1030   vnet_rewrite_for_sw_interface
1031     (vnm,
1032      packet_type,
1033      sw_if_index,
1034      node_index,
1035      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1036      &adj->rewrite_header,
1037      sizeof (adj->rewrite_data));
1038 }
1039
1040 static void
1041 ip4_add_interface_routes (u32 sw_if_index,
1042                           ip4_main_t * im, u32 fib_index,
1043                           ip_interface_address_t * a)
1044 {
1045   vnet_main_t * vnm = vnet_get_main();
1046   ip_lookup_main_t * lm = &im->lookup_main;
1047   ip_adjacency_t * adj;
1048   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1049   ip4_add_del_route_args_t x;
1050   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1051   u32 classify_table_index;
1052
1053   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1054   x.table_index_or_table_id = fib_index;
1055   x.flags = (IP4_ROUTE_FLAG_ADD
1056              | IP4_ROUTE_FLAG_FIB_INDEX
1057              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1058   x.dst_address = address[0];
1059   x.dst_address_length = a->address_length;
1060   x.n_add_adj = 0;
1061   x.add_adj = 0;
1062
1063   a->neighbor_probe_adj_index = ~0;
1064   if (a->address_length < 32)
1065     {
1066       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1067                               &x.adj_index);
1068       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1069       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1070       ip4_add_del_route (im, &x);
1071       a->neighbor_probe_adj_index = x.adj_index;
1072     }
1073   
1074   /* Add e.g. 1.1.1.1/32 as local to this host. */
1075   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1076                           &x.adj_index);
1077   
1078   classify_table_index = ~0;
1079   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1080     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1081   if (classify_table_index != (u32) ~0)
1082     {
1083       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1084       adj->classify.table_index = classify_table_index;
1085     }
1086   else
1087     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1088   
1089   adj->if_address_index = a - lm->if_address_pool;
1090   adj->rewrite_header.sw_if_index = sw_if_index;
1091   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1092   /* 
1093    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1094    * fail an RPF-ish check, but still go thru the rewrite code...
1095    */
1096   adj->rewrite_header.data_bytes = 0;
1097
1098   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1099   x.dst_address_length = 32;
1100   ip4_add_del_route (im, &x);
1101 }
1102
1103 static void
1104 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1105 {
1106   ip4_add_del_route_args_t x;
1107
1108   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1109   x.table_index_or_table_id = fib_index;
1110   x.flags = (IP4_ROUTE_FLAG_DEL
1111              | IP4_ROUTE_FLAG_FIB_INDEX
1112              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1113   x.dst_address = address[0];
1114   x.dst_address_length = address_length;
1115   x.adj_index = ~0;
1116   x.n_add_adj = 0;
1117   x.add_adj = 0;
1118
1119   if (address_length < 32)
1120     ip4_add_del_route (im, &x);
1121
1122   x.dst_address_length = 32;
1123   ip4_add_del_route (im, &x);
1124
1125   ip4_delete_matching_routes (im,
1126                               fib_index,
1127                               IP4_ROUTE_FLAG_FIB_INDEX,
1128                               address,
1129                               address_length);
1130 }
1131
1132 typedef struct {
1133     u32 sw_if_index;
1134     ip4_address_t address;
1135     u32 length;
1136 } ip4_interface_address_t;
1137
1138 static clib_error_t *
1139 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1140                                         u32 sw_if_index,
1141                                         ip4_address_t * new_address,
1142                                         u32 new_length,
1143                                         u32 redistribute,
1144                                         u32 insert_routes,
1145                                         u32 is_del);
1146
1147 static clib_error_t *
1148 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1149                                         u32 sw_if_index,
1150                                         ip4_address_t * address,
1151                                         u32 address_length,
1152                                         u32 redistribute,
1153                                         u32 insert_routes,
1154                                         u32 is_del)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip4_main_t * im = &ip4_main;
1158   ip_lookup_main_t * lm = &im->lookup_main;
1159   clib_error_t * error = 0;
1160   u32 if_address_index, elts_before;
1161   ip4_address_fib_t ip4_af, * addr_fib = 0;
1162
1163   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1164   ip4_addr_fib_init (&ip4_af, address,
1165                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1166   vec_add1 (addr_fib, ip4_af);
1167
1168   /* When adding an address check that it does not conflict with an existing address. */
1169   if (! is_del)
1170     {
1171       ip_interface_address_t * ia;
1172       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1173                                     0 /* honor unnumbered */,
1174       ({
1175         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1176
1177         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1178             || ip4_destination_matches_route (im, x, address, address_length))
1179           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1180                                     format_ip4_address_and_length, address, address_length,
1181                                     format_ip4_address_and_length, x, ia->address_length,
1182                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1183       }));
1184     }
1185
1186   elts_before = pool_elts (lm->if_address_pool);
1187
1188   error = ip_interface_address_add_del
1189     (lm,
1190      sw_if_index,
1191      addr_fib,
1192      address_length,
1193      is_del,
1194      &if_address_index);
1195   if (error)
1196     goto done;
1197   
1198   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1199     {
1200       if (is_del)
1201         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1202                                   address_length);
1203       
1204       else
1205           ip4_add_interface_routes (sw_if_index,
1206                                     im, ip4_af.fib_index,
1207                                     pool_elt_at_index 
1208                                     (lm->if_address_pool, if_address_index));
1209     }
1210
1211   /* If pool did not grow/shrink: add duplicate address. */
1212   if (elts_before != pool_elts (lm->if_address_pool))
1213     {
1214       ip4_add_del_interface_address_callback_t * cb;
1215       vec_foreach (cb, im->add_del_interface_address_callbacks)
1216         cb->function (im, cb->function_opaque, sw_if_index,
1217                       address, address_length,
1218                       if_address_index,
1219                       is_del);
1220     }
1221
1222  done:
1223   vec_free (addr_fib);
1224   return error;
1225 }
1226
1227 clib_error_t *
1228 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1229                                ip4_address_t * address, u32 address_length,
1230                                u32 is_del)
1231 {
1232   return ip4_add_del_interface_address_internal
1233     (vm, sw_if_index, address, address_length,
1234      /* redistribute */ 1,
1235      /* insert_routes */ 1,
1236      is_del);
1237 }
1238
1239 static clib_error_t *
1240 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1241                                 u32 sw_if_index,
1242                                 u32 flags)
1243 {
1244   ip4_main_t * im = &ip4_main;
1245   ip_interface_address_t * ia;
1246   ip4_address_t * a;
1247   u32 is_admin_up, fib_index;
1248   
1249   /* Fill in lookup tables with default table (0). */
1250   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1251   
1252   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1253   
1254   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1255   
1256   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1257
1258   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1259                                 0 /* honor unnumbered */,
1260   ({
1261     a = ip_interface_address_get_address (&im->lookup_main, ia);
1262     if (is_admin_up)
1263       ip4_add_interface_routes (sw_if_index,
1264                                 im, fib_index,
1265                                 ia);
1266     else
1267       ip4_del_interface_routes (im, fib_index,
1268                                 a, ia->address_length);
1269   }));
1270
1271   return 0;
1272 }
1273  
1274 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1275
1276 static clib_error_t *
1277 ip4_sw_interface_add_del (vnet_main_t * vnm,
1278                           u32 sw_if_index,
1279                           u32 is_add)
1280 {
1281   vlib_main_t * vm = vnm->vlib_main;
1282   ip4_main_t * im = &ip4_main;
1283   ip_lookup_main_t * lm = &im->lookup_main;
1284   u32 ci, cast;
1285
1286   for (cast = 0; cast < VNET_N_CAST; cast++)
1287     {
1288       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1289       vnet_config_main_t * vcm = &cm->config_main;
1290
1291       if (! vcm->node_index_by_feature_index)
1292         {
1293           if (cast == VNET_UNICAST)
1294             {
1295               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1296               static char * feature_nodes[] = {
1297                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1298                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1299                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1300                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1301                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1302                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1303               };
1304
1305               vnet_config_init (vm, vcm,
1306                                 start_nodes, ARRAY_LEN (start_nodes),
1307                                 feature_nodes, ARRAY_LEN (feature_nodes));
1308             }
1309           else
1310             {
1311               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1312               static char * feature_nodes[] = {
1313                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1314                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1315               };
1316
1317               vnet_config_init (vm, vcm,
1318                                 start_nodes, ARRAY_LEN (start_nodes),
1319                                 feature_nodes, ARRAY_LEN (feature_nodes));
1320             }
1321         }
1322
1323       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1324       ci = cm->config_index_by_sw_if_index[sw_if_index];
1325
1326       if (is_add)
1327         ci = vnet_config_add_feature (vm, vcm,
1328                                       ci,
1329                                       IP4_RX_FEATURE_LOOKUP,
1330                                       /* config data */ 0,
1331                                       /* # bytes of config data */ 0);
1332       else
1333         ci = vnet_config_del_feature (vm, vcm,
1334                                       ci,
1335                                       IP4_RX_FEATURE_LOOKUP,
1336                                       /* config data */ 0,
1337                                       /* # bytes of config data */ 0);
1338
1339       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1340     }
1341
1342   return /* no error */ 0;
1343 }
1344
1345 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1346
1347
1348 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1349   .function = ip4_lookup,
1350   .name = "ip4-lookup",
1351   .vector_size = sizeof (u32),
1352
1353   .n_next_nodes = IP_LOOKUP_N_NEXT,
1354   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1355 };
1356
1357 static uword
1358 ip4_indirect (vlib_main_t * vm,
1359                vlib_node_runtime_t * node,
1360                vlib_frame_t * frame)
1361 {
1362   return ip4_lookup_inline (vm, node, frame,
1363                             /* lookup_for_responses_to_locally_received_packets */ 0,
1364                             /* is_indirect */ 1);
1365 }
1366
1367 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1368   .function = ip4_indirect,
1369   .name = "ip4-indirect",
1370   .vector_size = sizeof (u32),
1371
1372   .n_next_nodes = IP_LOOKUP_N_NEXT,
1373   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1374 };
1375
1376
1377 /* Global IP4 main. */
1378 ip4_main_t ip4_main;
1379
1380 clib_error_t *
1381 ip4_lookup_init (vlib_main_t * vm)
1382 {
1383   ip4_main_t * im = &ip4_main;
1384   uword i;
1385
1386   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1387     {
1388       u32 m;
1389
1390       if (i < 32)
1391         m = pow2_mask (i) << (32 - i);
1392       else 
1393         m = ~0;
1394       im->fib_masks[i] = clib_host_to_net_u32 (m);
1395     }
1396
1397   /* Create FIB with index 0 and table id of 0. */
1398   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1399
1400   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1401
1402   {
1403     pg_node_t * pn;
1404     pn = pg_get_node (ip4_lookup_node.index);
1405     pn->unformat_edit = unformat_pg_ip4_header;
1406   }
1407
1408   {
1409     ethernet_arp_header_t h;
1410
1411     memset (&h, 0, sizeof (h));
1412
1413     /* Set target ethernet address to all zeros. */
1414     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1415
1416 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1417 #define _8(f,v) h.f = v;
1418     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1419     _16 (l3_type, ETHERNET_TYPE_IP4);
1420     _8 (n_l2_address_bytes, 6);
1421     _8 (n_l3_address_bytes, 4);
1422     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1423 #undef _16
1424 #undef _8
1425
1426     vlib_packet_template_init (vm,
1427                                &im->ip4_arp_request_packet_template,
1428                                /* data */ &h,
1429                                sizeof (h),
1430                                /* alloc chunk size */ 8,
1431                                "ip4 arp");
1432   }
1433
1434   return 0;
1435 }
1436
1437 VLIB_INIT_FUNCTION (ip4_lookup_init);
1438
1439 typedef struct {
1440   /* Adjacency taken. */
1441   u32 adj_index;
1442   u32 flow_hash;
1443   u32 fib_index;
1444
1445   /* Packet data, possibly *after* rewrite. */
1446   u8 packet_data[64 - 1*sizeof(u32)];
1447 } ip4_forward_next_trace_t;
1448
1449 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1450 {
1451   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1452   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1453   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1454   vnet_main_t * vnm = vnet_get_main();
1455   ip4_main_t * im = &ip4_main;
1456   ip_adjacency_t * adj;
1457   uword indent = format_get_indent (s);
1458
1459   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1460   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1461               t->fib_index, t->adj_index, format_ip_adjacency,
1462               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1463   switch (adj->lookup_next_index)
1464     {
1465     case IP_LOOKUP_NEXT_REWRITE:
1466       s = format (s, "\n%U%U",
1467                   format_white_space, indent,
1468                   format_ip_adjacency_packet_data,
1469                   vnm, &im->lookup_main, t->adj_index,
1470                   t->packet_data, sizeof (t->packet_data));
1471       break;
1472
1473     default:
1474       break;
1475     }
1476
1477   return s;
1478 }
1479
1480 /* Common trace function for all ip4-forward next nodes. */
1481 void
1482 ip4_forward_next_trace (vlib_main_t * vm,
1483                         vlib_node_runtime_t * node,
1484                         vlib_frame_t * frame,
1485                         vlib_rx_or_tx_t which_adj_index)
1486 {
1487   u32 * from, n_left;
1488   ip4_main_t * im = &ip4_main;
1489
1490   n_left = frame->n_vectors;
1491   from = vlib_frame_vector_args (frame);
1492   
1493   while (n_left >= 4)
1494     {
1495       u32 bi0, bi1;
1496       vlib_buffer_t * b0, * b1;
1497       ip4_forward_next_trace_t * t0, * t1;
1498
1499       /* Prefetch next iteration. */
1500       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1501       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1502
1503       bi0 = from[0];
1504       bi1 = from[1];
1505
1506       b0 = vlib_get_buffer (vm, bi0);
1507       b1 = vlib_get_buffer (vm, bi1);
1508
1509       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1510         {
1511           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1512           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1513           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1514           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1515                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1516           clib_memcpy (t0->packet_data,
1517                   vlib_buffer_get_current (b0),
1518                   sizeof (t0->packet_data));
1519         }
1520       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1521         {
1522           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1523           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1524           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1525           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1526                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1527           clib_memcpy (t1->packet_data,
1528                   vlib_buffer_get_current (b1),
1529                   sizeof (t1->packet_data));
1530         }
1531       from += 2;
1532       n_left -= 2;
1533     }
1534
1535   while (n_left >= 1)
1536     {
1537       u32 bi0;
1538       vlib_buffer_t * b0;
1539       ip4_forward_next_trace_t * t0;
1540
1541       bi0 = from[0];
1542
1543       b0 = vlib_get_buffer (vm, bi0);
1544
1545       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1546         {
1547           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1548           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1549           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1550           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1551                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1552           clib_memcpy (t0->packet_data,
1553                   vlib_buffer_get_current (b0),
1554                   sizeof (t0->packet_data));
1555         }
1556       from += 1;
1557       n_left -= 1;
1558     }
1559 }
1560
1561 static uword
1562 ip4_drop_or_punt (vlib_main_t * vm,
1563                   vlib_node_runtime_t * node,
1564                   vlib_frame_t * frame,
1565                   ip4_error_t error_code)
1566 {
1567   u32 * buffers = vlib_frame_vector_args (frame);
1568   uword n_packets = frame->n_vectors;
1569
1570   vlib_error_drop_buffers (vm, node,
1571                            buffers,
1572                            /* stride */ 1,
1573                            n_packets,
1574                            /* next */ 0,
1575                            ip4_input_node.index,
1576                            error_code);
1577
1578   if (node->flags & VLIB_NODE_FLAG_TRACE)
1579     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1580
1581   return n_packets;
1582 }
1583
1584 static uword
1585 ip4_drop (vlib_main_t * vm,
1586           vlib_node_runtime_t * node,
1587           vlib_frame_t * frame)
1588 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1589
1590 static uword
1591 ip4_punt (vlib_main_t * vm,
1592           vlib_node_runtime_t * node,
1593           vlib_frame_t * frame)
1594 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1595
1596 static uword
1597 ip4_miss (vlib_main_t * vm,
1598           vlib_node_runtime_t * node,
1599           vlib_frame_t * frame)
1600 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1601
1602 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1603   .function = ip4_drop,
1604   .name = "ip4-drop",
1605   .vector_size = sizeof (u32),
1606
1607   .format_trace = format_ip4_forward_next_trace,
1608
1609   .n_next_nodes = 1,
1610   .next_nodes = {
1611     [0] = "error-drop",
1612   },
1613 };
1614
1615 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1616   .function = ip4_punt,
1617   .name = "ip4-punt",
1618   .vector_size = sizeof (u32),
1619
1620   .format_trace = format_ip4_forward_next_trace,
1621
1622   .n_next_nodes = 1,
1623   .next_nodes = {
1624     [0] = "error-punt",
1625   },
1626 };
1627
1628 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1629   .function = ip4_miss,
1630   .name = "ip4-miss",
1631   .vector_size = sizeof (u32),
1632
1633   .format_trace = format_ip4_forward_next_trace,
1634
1635   .n_next_nodes = 1,
1636   .next_nodes = {
1637     [0] = "error-drop",
1638   },
1639 };
1640
1641 /* Compute TCP/UDP/ICMP4 checksum in software. */
1642 u16
1643 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1644                               ip4_header_t * ip0)
1645 {
1646   ip_csum_t sum0;
1647   u32 ip_header_length, payload_length_host_byte_order;
1648   u32 n_this_buffer, n_bytes_left;
1649   u16 sum16;
1650   void * data_this_buffer;
1651   
1652   /* Initialize checksum with ip header. */
1653   ip_header_length = ip4_header_bytes (ip0);
1654   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1655   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1656
1657   if (BITS (uword) == 32)
1658     {
1659       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1660       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1661     }
1662   else
1663     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1664
1665   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1666   data_this_buffer = (void *) ip0 + ip_header_length;
1667   if (n_this_buffer + ip_header_length > p0->current_length)
1668     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1669   while (1)
1670     {
1671       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1672       n_bytes_left -= n_this_buffer;
1673       if (n_bytes_left == 0)
1674         break;
1675
1676       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1677       p0 = vlib_get_buffer (vm, p0->next_buffer);
1678       data_this_buffer = vlib_buffer_get_current (p0);
1679       n_this_buffer = p0->current_length;
1680     }
1681
1682   sum16 = ~ ip_csum_fold (sum0);
1683
1684   return sum16;
1685 }
1686
1687 static u32
1688 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1689 {
1690   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1691   udp_header_t * udp0;
1692   u16 sum16;
1693
1694   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1695           || ip0->protocol == IP_PROTOCOL_UDP);
1696
1697   udp0 = (void *) (ip0 + 1);
1698   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1699     {
1700       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1701                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1702       return p0->flags;
1703     }
1704
1705   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1706
1707   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1708                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1709
1710   return p0->flags;
1711 }
1712
1713 static uword
1714 ip4_local (vlib_main_t * vm,
1715            vlib_node_runtime_t * node,
1716            vlib_frame_t * frame)
1717 {
1718   ip4_main_t * im = &ip4_main;
1719   ip_lookup_main_t * lm = &im->lookup_main;
1720   ip_local_next_t next_index;
1721   u32 * from, * to_next, n_left_from, n_left_to_next;
1722   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1723
1724   from = vlib_frame_vector_args (frame);
1725   n_left_from = frame->n_vectors;
1726   next_index = node->cached_next_index;
1727   
1728   if (node->flags & VLIB_NODE_FLAG_TRACE)
1729     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1730
1731   while (n_left_from > 0)
1732     {
1733       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1734
1735       while (n_left_from >= 4 && n_left_to_next >= 2)
1736         {
1737           vlib_buffer_t * p0, * p1;
1738           ip4_header_t * ip0, * ip1;
1739           udp_header_t * udp0, * udp1;
1740           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1741           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1742           ip_adjacency_t * adj0, * adj1;
1743           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1744           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1745           i32 len_diff0, len_diff1;
1746           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1747           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1748           u8 enqueue_code;
1749       
1750           pi0 = to_next[0] = from[0];
1751           pi1 = to_next[1] = from[1];
1752           from += 2;
1753           n_left_from -= 2;
1754           to_next += 2;
1755           n_left_to_next -= 2;
1756       
1757           p0 = vlib_get_buffer (vm, pi0);
1758           p1 = vlib_get_buffer (vm, pi1);
1759
1760           ip0 = vlib_buffer_get_current (p0);
1761           ip1 = vlib_buffer_get_current (p1);
1762
1763           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1764                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1765           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1766                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1767
1768           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1769           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1770
1771           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1772
1773           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1774           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1775
1776           /* Treat IP frag packets as "experimental" protocol for now
1777              until support of IP frag reassembly is implemented */
1778           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1779           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1780           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1781           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1782           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1783           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1784
1785           flags0 = p0->flags;
1786           flags1 = p1->flags;
1787
1788           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1789           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1790
1791           udp0 = ip4_next_header (ip0);
1792           udp1 = ip4_next_header (ip1);
1793
1794           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1795           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1796           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1797
1798           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1799           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1800
1801           /* Verify UDP length. */
1802           ip_len0 = clib_net_to_host_u16 (ip0->length);
1803           ip_len1 = clib_net_to_host_u16 (ip1->length);
1804           udp_len0 = clib_net_to_host_u16 (udp0->length);
1805           udp_len1 = clib_net_to_host_u16 (udp1->length);
1806
1807           len_diff0 = ip_len0 - udp_len0;
1808           len_diff1 = ip_len1 - udp_len1;
1809
1810           len_diff0 = is_udp0 ? len_diff0 : 0;
1811           len_diff1 = is_udp1 ? len_diff1 : 0;
1812
1813           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1814                                 & good_tcp_udp0 & good_tcp_udp1)))
1815             {
1816               if (is_tcp_udp0)
1817                 {
1818                   if (is_tcp_udp0
1819                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1820                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1821                   good_tcp_udp0 =
1822                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1823                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1824                 }
1825               if (is_tcp_udp1)
1826                 {
1827                   if (is_tcp_udp1
1828                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1829                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1830                   good_tcp_udp1 =
1831                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1832                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1833                 }
1834             }
1835
1836           good_tcp_udp0 &= len_diff0 >= 0;
1837           good_tcp_udp1 &= len_diff1 >= 0;
1838
1839           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1840           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1841
1842           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1843
1844           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1845           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1846
1847           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1848           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1849                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1850                     : error0);
1851           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1852                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1853                     : error1);
1854
1855           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1856           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1857
1858           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1859           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1860
1861           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1862           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1863
1864           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1865                                                            &ip0->src_address,
1866                                                            /* no_default_route */ 1));
1867           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1868                                                            &ip1->src_address,
1869                                                            /* no_default_route */ 1));
1870
1871           adj0 = ip_get_adjacency (lm, adj_index0);
1872           adj1 = ip_get_adjacency (lm, adj_index1);
1873
1874           /* 
1875            * Must have a route to source otherwise we drop the packet.
1876            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1877            */
1878           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1879                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1880                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1881                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1882                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1883                     ? IP4_ERROR_SRC_LOOKUP_MISS
1884                     : error0);
1885           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1886                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1887                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1888                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1889                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1890                     ? IP4_ERROR_SRC_LOOKUP_MISS
1891                     : error1);
1892
1893           next0 = lm->local_next_by_ip_protocol[proto0];
1894           next1 = lm->local_next_by_ip_protocol[proto1];
1895
1896           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1897           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1898
1899           p0->error = error0 ? error_node->errors[error0] : 0;
1900           p1->error = error1 ? error_node->errors[error1] : 0;
1901
1902           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1903
1904           if (PREDICT_FALSE (enqueue_code != 0))
1905             {
1906               switch (enqueue_code)
1907                 {
1908                 case 1:
1909                   /* A B A */
1910                   to_next[-2] = pi1;
1911                   to_next -= 1;
1912                   n_left_to_next += 1;
1913                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1914                   break;
1915
1916                 case 2:
1917                   /* A A B */
1918                   to_next -= 1;
1919                   n_left_to_next += 1;
1920                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1921                   break;
1922
1923                 case 3:
1924                   /* A B B or A B C */
1925                   to_next -= 2;
1926                   n_left_to_next += 2;
1927                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1928                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1929                   if (next0 == next1)
1930                     {
1931                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1932                       next_index = next1;
1933                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1934                     }
1935                   break;
1936                 }
1937             }
1938         }
1939
1940       while (n_left_from > 0 && n_left_to_next > 0)
1941         {
1942           vlib_buffer_t * p0;
1943           ip4_header_t * ip0;
1944           udp_header_t * udp0;
1945           ip4_fib_mtrie_t * mtrie0;
1946           ip4_fib_mtrie_leaf_t leaf0;
1947           ip_adjacency_t * adj0;
1948           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1949           i32 len_diff0;
1950           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1951       
1952           pi0 = to_next[0] = from[0];
1953           from += 1;
1954           n_left_from -= 1;
1955           to_next += 1;
1956           n_left_to_next -= 1;
1957       
1958           p0 = vlib_get_buffer (vm, pi0);
1959
1960           ip0 = vlib_buffer_get_current (p0);
1961
1962           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1963                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1964
1965           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1966
1967           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1968
1969           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1970
1971           /* Treat IP frag packets as "experimental" protocol for now
1972              until support of IP frag reassembly is implemented */
1973           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1974           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1975           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1976
1977           flags0 = p0->flags;
1978
1979           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1980
1981           udp0 = ip4_next_header (ip0);
1982
1983           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1984           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1985
1986           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1987
1988           /* Verify UDP length. */
1989           ip_len0 = clib_net_to_host_u16 (ip0->length);
1990           udp_len0 = clib_net_to_host_u16 (udp0->length);
1991
1992           len_diff0 = ip_len0 - udp_len0;
1993
1994           len_diff0 = is_udp0 ? len_diff0 : 0;
1995
1996           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1997             {
1998               if (is_tcp_udp0)
1999                 {
2000                   if (is_tcp_udp0
2001                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2002                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2003                   good_tcp_udp0 =
2004                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2005                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2006                 }
2007             }
2008
2009           good_tcp_udp0 &= len_diff0 >= 0;
2010
2011           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2012
2013           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2014
2015           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2016
2017           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2018           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2019                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2020                     : error0);
2021
2022           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2023
2024           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2025           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2026
2027           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2028                                                            &ip0->src_address,
2029                                                            /* no_default_route */ 1));
2030
2031           adj0 = ip_get_adjacency (lm, adj_index0);
2032
2033           /* Must have a route to source otherwise we drop the packet. */
2034           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2035                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2036                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2037                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2038                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2039                     ? IP4_ERROR_SRC_LOOKUP_MISS
2040                     : error0);
2041
2042           next0 = lm->local_next_by_ip_protocol[proto0];
2043
2044           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2045
2046           p0->error = error0? error_node->errors[error0] : 0;
2047
2048           if (PREDICT_FALSE (next0 != next_index))
2049             {
2050               n_left_to_next += 1;
2051               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2052
2053               next_index = next0;
2054               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2055               to_next[0] = pi0;
2056               to_next += 1;
2057               n_left_to_next -= 1;
2058             }
2059         }
2060   
2061       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2062     }
2063
2064   return frame->n_vectors;
2065 }
2066
2067 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2068   .function = ip4_local,
2069   .name = "ip4-local",
2070   .vector_size = sizeof (u32),
2071
2072   .format_trace = format_ip4_forward_next_trace,
2073
2074   .n_next_nodes = IP_LOCAL_N_NEXT,
2075   .next_nodes = {
2076     [IP_LOCAL_NEXT_DROP] = "error-drop",
2077     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2078     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2079     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2080   },
2081 };
2082
2083 void ip4_register_protocol (u32 protocol, u32 node_index)
2084 {
2085   vlib_main_t * vm = vlib_get_main();
2086   ip4_main_t * im = &ip4_main;
2087   ip_lookup_main_t * lm = &im->lookup_main;
2088
2089   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2090   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2091 }
2092
2093 static clib_error_t *
2094 show_ip_local_command_fn (vlib_main_t * vm,
2095                           unformat_input_t * input,
2096                          vlib_cli_command_t * cmd)
2097 {
2098   ip4_main_t * im = &ip4_main;
2099   ip_lookup_main_t * lm = &im->lookup_main;
2100   int i;
2101
2102   vlib_cli_output (vm, "Protocols handled by ip4_local");
2103   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2104     {
2105       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2106         vlib_cli_output (vm, "%d", i);
2107     }
2108   return 0;
2109 }
2110
2111
2112
2113 VLIB_CLI_COMMAND (show_ip_local, static) = {
2114   .path = "show ip local",
2115   .function = show_ip_local_command_fn,
2116   .short_help = "Show ip local protocol table",
2117 };
2118
2119 static uword
2120 ip4_arp (vlib_main_t * vm,
2121          vlib_node_runtime_t * node,
2122          vlib_frame_t * frame)
2123 {
2124   vnet_main_t * vnm = vnet_get_main();
2125   ip4_main_t * im = &ip4_main;
2126   ip_lookup_main_t * lm = &im->lookup_main;
2127   u32 * from, * to_next_drop;
2128   uword n_left_from, n_left_to_next_drop, next_index;
2129   static f64 time_last_seed_change = -1e100;
2130   static u32 hash_seeds[3];
2131   static uword hash_bitmap[256 / BITS (uword)]; 
2132   f64 time_now;
2133
2134   if (node->flags & VLIB_NODE_FLAG_TRACE)
2135     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2136
2137   time_now = vlib_time_now (vm);
2138   if (time_now - time_last_seed_change > 1e-3)
2139     {
2140       uword i;
2141       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2142                                              sizeof (hash_seeds));
2143       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2144         hash_seeds[i] = r[i];
2145
2146       /* Mark all hash keys as been no-seen before. */
2147       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2148         hash_bitmap[i] = 0;
2149
2150       time_last_seed_change = time_now;
2151     }
2152
2153   from = vlib_frame_vector_args (frame);
2154   n_left_from = frame->n_vectors;
2155   next_index = node->cached_next_index;
2156   if (next_index == IP4_ARP_NEXT_DROP)
2157     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2158
2159   while (n_left_from > 0)
2160     {
2161       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2162                            to_next_drop, n_left_to_next_drop);
2163
2164       while (n_left_from > 0 && n_left_to_next_drop > 0)
2165         {
2166           vlib_buffer_t * p0;
2167           ip4_header_t * ip0;
2168           ethernet_header_t * eh0;
2169           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2170           uword bm0;
2171           ip_adjacency_t * adj0;
2172
2173           pi0 = from[0];
2174
2175           p0 = vlib_get_buffer (vm, pi0);
2176
2177           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2178           adj0 = ip_get_adjacency (lm, adj_index0);
2179           ip0 = vlib_buffer_get_current (p0);
2180
2181           /* If packet destination is not local, send ARP to next hop */
2182           if (adj0->arp.next_hop.ip4.as_u32)
2183             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2184
2185           /* 
2186            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2187            * rewrite to this packet, we need to skip it here.
2188            * Note, to distinguish from src IP addr *.8.6.*, we
2189            * check for a bcast eth dest instead of IPv4 version.
2190            */
2191           eh0 = (ethernet_header_t*)ip0;
2192           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2193             {
2194               u32 vlan_num = 0;
2195               u16 * etype = &eh0->type;
2196               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2197                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2198                 {
2199                   vlan_num += 1;
2200                   etype += 2; //vlan tag also 16 bits, same as etype
2201                 }
2202               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2203                 {
2204                   vlib_buffer_advance (
2205                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2206                   ip0 = vlib_buffer_get_current (p0);
2207                 }
2208             }
2209
2210           a0 = hash_seeds[0];
2211           b0 = hash_seeds[1];
2212           c0 = hash_seeds[2];
2213
2214           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2215           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2216
2217           a0 ^= ip0->dst_address.data_u32;
2218           b0 ^= sw_if_index0;
2219
2220           hash_v3_finalize32 (a0, b0, c0);
2221
2222           c0 &= BITS (hash_bitmap) - 1;
2223           c0 = c0 / BITS (uword);
2224           m0 = (uword) 1 << (c0 % BITS (uword));
2225
2226           bm0 = hash_bitmap[c0];
2227           drop0 = (bm0 & m0) != 0;
2228
2229           /* Mark it as seen. */
2230           hash_bitmap[c0] = bm0 | m0;
2231
2232           from += 1;
2233           n_left_from -= 1;
2234           to_next_drop[0] = pi0;
2235           to_next_drop += 1;
2236           n_left_to_next_drop -= 1;
2237
2238           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2239
2240           if (drop0)
2241             continue;
2242
2243           /* 
2244            * Can happen if the control-plane is programming tables
2245            * with traffic flowing; at least that's today's lame excuse.
2246            */
2247           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2248             {
2249               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2250             }
2251           else
2252           /* Send ARP request. */
2253           {
2254             u32 bi0 = 0;
2255             vlib_buffer_t * b0;
2256             ethernet_arp_header_t * h0;
2257             vnet_hw_interface_t * hw_if0;
2258
2259             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2260
2261             /* Add rewrite/encap string for ARP packet. */
2262             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2263
2264             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2265
2266             /* Src ethernet address in ARP header. */
2267             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2268                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2269
2270             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2271
2272             /* Copy in destination address we are requesting. */
2273             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2274
2275             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2276             b0 = vlib_get_buffer (vm, bi0);
2277             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2278
2279             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2280
2281             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2282           }
2283         }
2284
2285       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2286     }
2287
2288   return frame->n_vectors;
2289 }
2290
2291 static char * ip4_arp_error_strings[] = {
2292   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2293   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2294   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2295   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2296   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2297 };
2298
2299 VLIB_REGISTER_NODE (ip4_arp_node) = {
2300   .function = ip4_arp,
2301   .name = "ip4-arp",
2302   .vector_size = sizeof (u32),
2303
2304   .format_trace = format_ip4_forward_next_trace,
2305
2306   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2307   .error_strings = ip4_arp_error_strings,
2308
2309   .n_next_nodes = IP4_ARP_N_NEXT,
2310   .next_nodes = {
2311     [IP4_ARP_NEXT_DROP] = "error-drop",
2312   },
2313 };
2314
2315 #define foreach_notrace_ip4_arp_error           \
2316 _(DROP)                                         \
2317 _(REQUEST_SENT)                                 \
2318 _(REPLICATE_DROP)                               \
2319 _(REPLICATE_FAIL)
2320
2321 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2322 {
2323   vlib_node_runtime_t *rt = 
2324     vlib_node_get_runtime (vm, ip4_arp_node.index);
2325
2326   /* don't trace ARP request packets */
2327 #define _(a)                                    \
2328     vnet_pcap_drop_trace_filter_add_del         \
2329         (rt->errors[IP4_ARP_ERROR_##a],         \
2330          1 /* is_add */);
2331     foreach_notrace_ip4_arp_error;
2332 #undef _
2333   return 0;
2334 }
2335
2336 VLIB_INIT_FUNCTION(arp_notrace_init);
2337
2338
2339 /* Send an ARP request to see if given destination is reachable on given interface. */
2340 clib_error_t *
2341 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2342 {
2343   vnet_main_t * vnm = vnet_get_main();
2344   ip4_main_t * im = &ip4_main;
2345   ethernet_arp_header_t * h;
2346   ip4_address_t * src;
2347   ip_interface_address_t * ia;
2348   ip_adjacency_t * adj;
2349   vnet_hw_interface_t * hi;
2350   vnet_sw_interface_t * si;
2351   vlib_buffer_t * b;
2352   u32 bi = 0;
2353
2354   si = vnet_get_sw_interface (vnm, sw_if_index);
2355
2356   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2357     {
2358       return clib_error_return (0, "%U: interface %U down",
2359                                 format_ip4_address, dst, 
2360                                 format_vnet_sw_if_index_name, vnm, 
2361                                 sw_if_index);
2362     }
2363
2364   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2365   if (! src)
2366     {
2367       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2368       return clib_error_return 
2369         (0, "no matching interface address for destination %U (interface %U)",
2370          format_ip4_address, dst,
2371          format_vnet_sw_if_index_name, vnm, sw_if_index);
2372     }
2373
2374   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2375
2376   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2377
2378   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2379
2380   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2381
2382   h->ip4_over_ethernet[0].ip4 = src[0];
2383   h->ip4_over_ethernet[1].ip4 = dst[0];
2384
2385   b = vlib_get_buffer (vm, bi);
2386   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2387
2388   /* Add encapsulation string for software interface (e.g. ethernet header). */
2389   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2390   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2391
2392   {
2393     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2394     u32 * to_next = vlib_frame_vector_args (f);
2395     to_next[0] = bi;
2396     f->n_vectors = 1;
2397     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2398   }
2399
2400   return /* no error */ 0;
2401 }
2402
2403 typedef enum {
2404   IP4_REWRITE_NEXT_DROP,
2405   IP4_REWRITE_NEXT_ARP,
2406 } ip4_rewrite_next_t;
2407
2408 always_inline uword
2409 ip4_rewrite_inline (vlib_main_t * vm,
2410                     vlib_node_runtime_t * node,
2411                     vlib_frame_t * frame,
2412                     int rewrite_for_locally_received_packets)
2413 {
2414   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2415   u32 * from = vlib_frame_vector_args (frame);
2416   u32 n_left_from, n_left_to_next, * to_next, next_index;
2417   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2418   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2419
2420   n_left_from = frame->n_vectors;
2421   next_index = node->cached_next_index;
2422   u32 cpu_index = os_get_cpu_number();
2423   
2424   while (n_left_from > 0)
2425     {
2426       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2427
2428       while (n_left_from >= 4 && n_left_to_next >= 2)
2429         {
2430           ip_adjacency_t * adj0, * adj1;
2431           vlib_buffer_t * p0, * p1;
2432           ip4_header_t * ip0, * ip1;
2433           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2434           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2435           u32 next0_override, next1_override;
2436       
2437           if (rewrite_for_locally_received_packets)
2438               next0_override = next1_override = 0;
2439
2440           /* Prefetch next iteration. */
2441           {
2442             vlib_buffer_t * p2, * p3;
2443
2444             p2 = vlib_get_buffer (vm, from[2]);
2445             p3 = vlib_get_buffer (vm, from[3]);
2446
2447             vlib_prefetch_buffer_header (p2, STORE);
2448             vlib_prefetch_buffer_header (p3, STORE);
2449
2450             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2451             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2452           }
2453
2454           pi0 = to_next[0] = from[0];
2455           pi1 = to_next[1] = from[1];
2456
2457           from += 2;
2458           n_left_from -= 2;
2459           to_next += 2;
2460           n_left_to_next -= 2;
2461       
2462           p0 = vlib_get_buffer (vm, pi0);
2463           p1 = vlib_get_buffer (vm, pi1);
2464
2465           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2466           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2467
2468           /* We should never rewrite a pkt using the MISS adjacency */
2469           ASSERT(adj_index0 && adj_index1);
2470
2471           ip0 = vlib_buffer_get_current (p0);
2472           ip1 = vlib_buffer_get_current (p1);
2473
2474           error0 = error1 = IP4_ERROR_NONE;
2475
2476           /* Decrement TTL & update checksum.
2477              Works either endian, so no need for byte swap. */
2478           if (! rewrite_for_locally_received_packets)
2479             {
2480               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2481
2482               /* Input node should have reject packets with ttl 0. */
2483               ASSERT (ip0->ttl > 0);
2484               ASSERT (ip1->ttl > 0);
2485
2486               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2487               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2488
2489               checksum0 += checksum0 >= 0xffff;
2490               checksum1 += checksum1 >= 0xffff;
2491
2492               ip0->checksum = checksum0;
2493               ip1->checksum = checksum1;
2494
2495               ttl0 -= 1;
2496               ttl1 -= 1;
2497
2498               ip0->ttl = ttl0;
2499               ip1->ttl = ttl1;
2500
2501               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2502               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2503
2504               /* Verify checksum. */
2505               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2506               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2507             }
2508
2509           /* Rewrite packet header and updates lengths. */
2510           adj0 = ip_get_adjacency (lm, adj_index0);
2511           adj1 = ip_get_adjacency (lm, adj_index1);
2512       
2513           if (rewrite_for_locally_received_packets)
2514             {
2515               /*
2516                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2517                * we end up here with a local adjacency in hand
2518                * The local adj rewrite data is 0xfefe on purpose.
2519                * Bad engineer, no donut for you.
2520                */
2521               if (PREDICT_FALSE(adj0->lookup_next_index 
2522                                 == IP_LOOKUP_NEXT_LOCAL))
2523                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2524               if (PREDICT_FALSE(adj0->lookup_next_index
2525                                 == IP_LOOKUP_NEXT_ARP))
2526                 next0_override = IP4_REWRITE_NEXT_ARP;
2527               if (PREDICT_FALSE(adj1->lookup_next_index 
2528                                 == IP_LOOKUP_NEXT_LOCAL))
2529                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2530               if (PREDICT_FALSE(adj1->lookup_next_index
2531                                 == IP_LOOKUP_NEXT_ARP))
2532                 next1_override = IP4_REWRITE_NEXT_ARP;
2533             }
2534
2535           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2536           rw_len0 = adj0[0].rewrite_header.data_bytes;
2537           rw_len1 = adj1[0].rewrite_header.data_bytes;
2538           next0 = (error0 == IP4_ERROR_NONE) 
2539             ? adj0[0].rewrite_header.next_index : 0;
2540
2541           if (rewrite_for_locally_received_packets)
2542               next0 = next0 && next0_override ? next0_override : next0;
2543
2544           next1 = (error1 == IP4_ERROR_NONE)
2545             ? adj1[0].rewrite_header.next_index : 0;
2546
2547           if (rewrite_for_locally_received_packets)
2548               next1 = next1 && next1_override ? next1_override : next1;
2549
2550           /* 
2551            * We've already accounted for an ethernet_header_t elsewhere
2552            */
2553           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2554               vlib_increment_combined_counter 
2555                   (&lm->adjacency_counters,
2556                    cpu_index, adj_index0, 
2557                    /* packet increment */ 0,
2558                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2559
2560           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2561               vlib_increment_combined_counter 
2562                   (&lm->adjacency_counters,
2563                    cpu_index, adj_index1, 
2564                    /* packet increment */ 0,
2565                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2566
2567           /* Check MTU of outgoing interface. */
2568           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2569                     ? IP4_ERROR_MTU_EXCEEDED
2570                     : error0);
2571           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2572                     ? IP4_ERROR_MTU_EXCEEDED
2573                     : error1);
2574
2575           p0->current_data -= rw_len0;
2576           p1->current_data -= rw_len1;
2577
2578           p0->current_length += rw_len0;
2579           p1->current_length += rw_len1;
2580
2581           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2582           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2583       
2584           p0->error = error_node->errors[error0];
2585           p1->error = error_node->errors[error1];
2586
2587           /* Guess we are only writing on simple Ethernet header. */
2588           vnet_rewrite_two_headers (adj0[0], adj1[0],
2589                                     ip0, ip1,
2590                                     sizeof (ethernet_header_t));
2591       
2592           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2593                                            to_next, n_left_to_next,
2594                                            pi0, pi1, next0, next1);
2595         }
2596
2597       while (n_left_from > 0 && n_left_to_next > 0)
2598         {
2599           ip_adjacency_t * adj0;
2600           vlib_buffer_t * p0;
2601           ip4_header_t * ip0;
2602           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2603           u32 next0_override;
2604       
2605           if (rewrite_for_locally_received_packets)
2606               next0_override = 0;
2607
2608           pi0 = to_next[0] = from[0];
2609
2610           p0 = vlib_get_buffer (vm, pi0);
2611
2612           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2613
2614           /* We should never rewrite a pkt using the MISS adjacency */
2615           ASSERT(adj_index0);
2616
2617           adj0 = ip_get_adjacency (lm, adj_index0);
2618       
2619           ip0 = vlib_buffer_get_current (p0);
2620
2621           error0 = IP4_ERROR_NONE;
2622           next0 = 0;            /* drop on error */
2623
2624           /* Decrement TTL & update checksum. */
2625           if (! rewrite_for_locally_received_packets)
2626             {
2627               i32 ttl0 = ip0->ttl;
2628
2629               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2630
2631               checksum0 += checksum0 >= 0xffff;
2632
2633               ip0->checksum = checksum0;
2634
2635               ASSERT (ip0->ttl > 0);
2636
2637               ttl0 -= 1;
2638
2639               ip0->ttl = ttl0;
2640
2641               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2642
2643               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2644             }
2645
2646           if (rewrite_for_locally_received_packets)
2647             {
2648               /*
2649                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2650                * we end up here with a local adjacency in hand
2651                * The local adj rewrite data is 0xfefe on purpose.
2652                * Bad engineer, no donut for you.
2653                */
2654               if (PREDICT_FALSE(adj0->lookup_next_index 
2655                                 == IP_LOOKUP_NEXT_LOCAL))
2656                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2657               /* 
2658                * We have to override the next_index in ARP adjacencies,
2659                * because they're set up for ip4-arp, not this node...
2660                */
2661               if (PREDICT_FALSE(adj0->lookup_next_index
2662                                 == IP_LOOKUP_NEXT_ARP))
2663                 next0_override = IP4_REWRITE_NEXT_ARP;
2664             }
2665
2666           /* Guess we are only writing on simple Ethernet header. */
2667           vnet_rewrite_one_header (adj0[0], ip0, 
2668                                    sizeof (ethernet_header_t));
2669           
2670           /* Update packet buffer attributes/set output interface. */
2671           rw_len0 = adj0[0].rewrite_header.data_bytes;
2672           
2673           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2674               vlib_increment_combined_counter 
2675                   (&lm->adjacency_counters,
2676                    cpu_index, adj_index0, 
2677                    /* packet increment */ 0,
2678                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2679           
2680           /* Check MTU of outgoing interface. */
2681           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2682                     > adj0[0].rewrite_header.max_l3_packet_bytes
2683                     ? IP4_ERROR_MTU_EXCEEDED
2684                     : error0);
2685           
2686           p0->error = error_node->errors[error0];
2687           p0->current_data -= rw_len0;
2688           p0->current_length += rw_len0;
2689           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2690             adj0[0].rewrite_header.sw_if_index;
2691           
2692           next0 = (error0 == IP4_ERROR_NONE)
2693             ? adj0[0].rewrite_header.next_index : 0;
2694
2695           if (rewrite_for_locally_received_packets)
2696               next0 = next0 && next0_override ? next0_override : next0;
2697
2698           from += 1;
2699           n_left_from -= 1;
2700           to_next += 1;
2701           n_left_to_next -= 1;
2702       
2703           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2704                                            to_next, n_left_to_next,
2705                                            pi0, next0);
2706         }
2707   
2708       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2709     }
2710
2711   /* Need to do trace after rewrites to pick up new packet data. */
2712   if (node->flags & VLIB_NODE_FLAG_TRACE)
2713     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2714
2715   return frame->n_vectors;
2716 }
2717
2718 static uword
2719 ip4_rewrite_transit (vlib_main_t * vm,
2720                      vlib_node_runtime_t * node,
2721                      vlib_frame_t * frame)
2722 {
2723   return ip4_rewrite_inline (vm, node, frame,
2724                              /* rewrite_for_locally_received_packets */ 0);
2725 }
2726
2727 static uword
2728 ip4_rewrite_local (vlib_main_t * vm,
2729                    vlib_node_runtime_t * node,
2730                    vlib_frame_t * frame)
2731 {
2732   return ip4_rewrite_inline (vm, node, frame,
2733                              /* rewrite_for_locally_received_packets */ 1);
2734 }
2735
2736 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2737   .function = ip4_rewrite_transit,
2738   .name = "ip4-rewrite-transit",
2739   .vector_size = sizeof (u32),
2740
2741   .format_trace = format_ip4_forward_next_trace,
2742
2743   .n_next_nodes = 2,
2744   .next_nodes = {
2745     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2746     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2747   },
2748 };
2749
2750 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2751   .function = ip4_rewrite_local,
2752   .name = "ip4-rewrite-local",
2753   .vector_size = sizeof (u32),
2754
2755   .sibling_of = "ip4-rewrite-transit",
2756
2757   .format_trace = format_ip4_forward_next_trace,
2758
2759   .n_next_nodes = 2,
2760   .next_nodes = {
2761     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2762     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2763   },
2764 };
2765
2766 static clib_error_t *
2767 add_del_interface_table (vlib_main_t * vm,
2768                          unformat_input_t * input,
2769                          vlib_cli_command_t * cmd)
2770 {
2771   vnet_main_t * vnm = vnet_get_main();
2772   clib_error_t * error = 0;
2773   u32 sw_if_index, table_id;
2774
2775   sw_if_index = ~0;
2776
2777   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2778     {
2779       error = clib_error_return (0, "unknown interface `%U'",
2780                                  format_unformat_error, input);
2781       goto done;
2782     }
2783
2784   if (unformat (input, "%d", &table_id))
2785     ;
2786   else
2787     {
2788       error = clib_error_return (0, "expected table id `%U'",
2789                                  format_unformat_error, input);
2790       goto done;
2791     }
2792
2793   {
2794     ip4_main_t * im = &ip4_main;
2795     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2796
2797     if (fib) 
2798       {
2799         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2800         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2801     }
2802   }
2803
2804  done:
2805   return error;
2806 }
2807
2808 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2809   .path = "set interface ip table",
2810   .function = add_del_interface_table,
2811   .short_help = "Add/delete FIB table id for interface",
2812 };
2813
2814
2815 static uword
2816 ip4_lookup_multicast (vlib_main_t * vm,
2817                       vlib_node_runtime_t * node,
2818                       vlib_frame_t * frame)
2819 {
2820   ip4_main_t * im = &ip4_main;
2821   ip_lookup_main_t * lm = &im->lookup_main;
2822   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2823   u32 n_left_from, n_left_to_next, * from, * to_next;
2824   ip_lookup_next_t next;
2825   u32 cpu_index = os_get_cpu_number();
2826
2827   from = vlib_frame_vector_args (frame);
2828   n_left_from = frame->n_vectors;
2829   next = node->cached_next_index;
2830
2831   while (n_left_from > 0)
2832     {
2833       vlib_get_next_frame (vm, node, next,
2834                            to_next, n_left_to_next);
2835
2836       while (n_left_from >= 4 && n_left_to_next >= 2)
2837         {
2838           vlib_buffer_t * p0, * p1;
2839           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2840           ip_lookup_next_t next0, next1;
2841           ip4_header_t * ip0, * ip1;
2842           ip_adjacency_t * adj0, * adj1;
2843           u32 fib_index0, fib_index1;
2844           u32 flow_hash_config0, flow_hash_config1;
2845
2846           /* Prefetch next iteration. */
2847           {
2848             vlib_buffer_t * p2, * p3;
2849
2850             p2 = vlib_get_buffer (vm, from[2]);
2851             p3 = vlib_get_buffer (vm, from[3]);
2852
2853             vlib_prefetch_buffer_header (p2, LOAD);
2854             vlib_prefetch_buffer_header (p3, LOAD);
2855
2856             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2857             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2858           }
2859
2860           pi0 = to_next[0] = from[0];
2861           pi1 = to_next[1] = from[1];
2862
2863           p0 = vlib_get_buffer (vm, pi0);
2864           p1 = vlib_get_buffer (vm, pi1);
2865
2866           ip0 = vlib_buffer_get_current (p0);
2867           ip1 = vlib_buffer_get_current (p1);
2868
2869           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2870           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2871           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2872             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2873           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2874             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2875
2876           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2877                                               &ip0->dst_address, p0);
2878           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2879                                               &ip1->dst_address, p1);
2880
2881           adj0 = ip_get_adjacency (lm, adj_index0);
2882           adj1 = ip_get_adjacency (lm, adj_index1);
2883
2884           next0 = adj0->lookup_next_index;
2885           next1 = adj1->lookup_next_index;
2886
2887           flow_hash_config0 = 
2888               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2889
2890           flow_hash_config1 = 
2891               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2892
2893           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2894               (ip0, flow_hash_config0);
2895                                                                   
2896           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2897               (ip1, flow_hash_config1);
2898
2899           ASSERT (adj0->n_adj > 0);
2900           ASSERT (adj1->n_adj > 0);
2901           ASSERT (is_pow2 (adj0->n_adj));
2902           ASSERT (is_pow2 (adj1->n_adj));
2903           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2904           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2905
2906           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2907           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2908
2909           if (1) /* $$$$$$ HACK FIXME */
2910           vlib_increment_combined_counter 
2911               (cm, cpu_index, adj_index0, 1,
2912                vlib_buffer_length_in_chain (vm, p0));
2913           if (1) /* $$$$$$ HACK FIXME */
2914           vlib_increment_combined_counter 
2915               (cm, cpu_index, adj_index1, 1,
2916                vlib_buffer_length_in_chain (vm, p1));
2917
2918           from += 2;
2919           to_next += 2;
2920           n_left_to_next -= 2;
2921           n_left_from -= 2;
2922
2923           wrong_next = (next0 != next) + 2*(next1 != next);
2924           if (PREDICT_FALSE (wrong_next != 0))
2925             {
2926               switch (wrong_next)
2927                 {
2928                 case 1:
2929                   /* A B A */
2930                   to_next[-2] = pi1;
2931                   to_next -= 1;
2932                   n_left_to_next += 1;
2933                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2934                   break;
2935
2936                 case 2:
2937                   /* A A B */
2938                   to_next -= 1;
2939                   n_left_to_next += 1;
2940                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2941                   break;
2942
2943                 case 3:
2944                   /* A B C */
2945                   to_next -= 2;
2946                   n_left_to_next += 2;
2947                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2948                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2949                   if (next0 == next1)
2950                     {
2951                       /* A B B */
2952                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2953                       next = next1;
2954                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2955                     }
2956                 }
2957             }
2958         }
2959     
2960       while (n_left_from > 0 && n_left_to_next > 0)
2961         {
2962           vlib_buffer_t * p0;
2963           ip4_header_t * ip0;
2964           u32 pi0, adj_index0;
2965           ip_lookup_next_t next0;
2966           ip_adjacency_t * adj0;
2967           u32 fib_index0;
2968           u32 flow_hash_config0;
2969
2970           pi0 = from[0];
2971           to_next[0] = pi0;
2972
2973           p0 = vlib_get_buffer (vm, pi0);
2974
2975           ip0 = vlib_buffer_get_current (p0);
2976
2977           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2978                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2979           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2980               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2981           
2982           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2983                                               &ip0->dst_address, p0);
2984
2985           adj0 = ip_get_adjacency (lm, adj_index0);
2986
2987           next0 = adj0->lookup_next_index;
2988
2989           flow_hash_config0 = 
2990               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2991
2992           vnet_buffer (p0)->ip.flow_hash = 
2993             ip4_compute_flow_hash (ip0, flow_hash_config0);
2994
2995           ASSERT (adj0->n_adj > 0);
2996           ASSERT (is_pow2 (adj0->n_adj));
2997           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2998
2999           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3000
3001           if (1) /* $$$$$$ HACK FIXME */
3002               vlib_increment_combined_counter 
3003                   (cm, cpu_index, adj_index0, 1,
3004                    vlib_buffer_length_in_chain (vm, p0));
3005
3006           from += 1;
3007           to_next += 1;
3008           n_left_to_next -= 1;
3009           n_left_from -= 1;
3010
3011           if (PREDICT_FALSE (next0 != next))
3012             {
3013               n_left_to_next += 1;
3014               vlib_put_next_frame (vm, node, next, n_left_to_next);
3015               next = next0;
3016               vlib_get_next_frame (vm, node, next,
3017                                    to_next, n_left_to_next);
3018               to_next[0] = pi0;
3019               to_next += 1;
3020               n_left_to_next -= 1;
3021             }
3022         }
3023
3024       vlib_put_next_frame (vm, node, next, n_left_to_next);
3025     }
3026
3027   return frame->n_vectors;
3028 }
3029
3030 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3031   .function = ip4_lookup_multicast,
3032   .name = "ip4-lookup-multicast",
3033   .vector_size = sizeof (u32),
3034
3035   .n_next_nodes = IP_LOOKUP_N_NEXT,
3036   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3037 };
3038
3039 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3040   .function = ip4_drop,
3041   .name = "ip4-multicast",
3042   .vector_size = sizeof (u32),
3043
3044   .format_trace = format_ip4_forward_next_trace,
3045
3046   .n_next_nodes = 1,
3047   .next_nodes = {
3048     [0] = "error-drop",
3049   },
3050 };
3051
3052 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3053 {
3054   ip4_main_t * im = &ip4_main;
3055   ip4_fib_mtrie_t * mtrie0;
3056   ip4_fib_mtrie_leaf_t leaf0;
3057   u32 adj_index0;
3058     
3059   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3060
3061   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3062   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3063   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3064   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3065   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3066   
3067   /* Handle default route. */
3068   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3069   
3070   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3071   
3072   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3073                                                   a, 
3074                                                   /* no_default_route */ 0);
3075 }
3076  
3077 static clib_error_t *
3078 test_lookup_command_fn (vlib_main_t * vm,
3079                         unformat_input_t * input,
3080                         vlib_cli_command_t * cmd)
3081 {
3082   u32 table_id = 0;
3083   f64 count = 1;
3084   u32 n;
3085   int i;
3086   ip4_address_t ip4_base_address;
3087   u64 errors = 0;
3088
3089   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3090       if (unformat (input, "table %d", &table_id))
3091         ;
3092       else if (unformat (input, "count %f", &count))
3093         ;
3094
3095       else if (unformat (input, "%U",
3096                          unformat_ip4_address, &ip4_base_address))
3097         ;
3098       else
3099         return clib_error_return (0, "unknown input `%U'",
3100                                   format_unformat_error, input);
3101   }
3102
3103   n = count;
3104
3105   for (i = 0; i < n; i++)
3106     {
3107       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3108         errors++;
3109
3110       ip4_base_address.as_u32 = 
3111         clib_host_to_net_u32 (1 + 
3112                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3113     }
3114
3115   if (errors) 
3116     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3117   else
3118     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3119
3120   return 0;
3121 }
3122
3123 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3124     .path = "test lookup",
3125     .short_help = "test lookup",
3126     .function = test_lookup_command_fn,
3127 };
3128
3129 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3130 {
3131   ip4_main_t * im4 = &ip4_main;
3132   ip4_fib_t * fib;
3133   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3134
3135   if (p == 0)
3136     return VNET_API_ERROR_NO_SUCH_FIB;
3137
3138   fib = vec_elt_at_index (im4->fibs, p[0]);
3139
3140   fib->flow_hash_config = flow_hash_config;
3141   return 0;
3142 }
3143  
3144 static clib_error_t *
3145 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3146                              unformat_input_t * input,
3147                              vlib_cli_command_t * cmd)
3148 {
3149   int matched = 0;
3150   u32 table_id = 0;
3151   u32 flow_hash_config = 0;
3152   int rv;
3153
3154   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3155     if (unformat (input, "table %d", &table_id))
3156       matched = 1;
3157 #define _(a,v) \
3158     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3159     foreach_flow_hash_bit
3160 #undef _
3161     else break;
3162   }
3163   
3164   if (matched == 0)
3165     return clib_error_return (0, "unknown input `%U'",
3166                               format_unformat_error, input);
3167   
3168   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3169   switch (rv)
3170     {
3171     case 0:
3172       break;
3173       
3174     case VNET_API_ERROR_NO_SUCH_FIB:
3175       return clib_error_return (0, "no such FIB table %d", table_id);
3176       
3177     default:
3178       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3179       break;
3180     }
3181   
3182   return 0;
3183 }
3184  
3185 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3186   .path = "set ip flow-hash",
3187   .short_help = 
3188   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3189   .function = set_ip_flow_hash_command_fn,
3190 };
3191  
3192 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3193                                  u32 table_index)
3194 {
3195   vnet_main_t * vnm = vnet_get_main();
3196   vnet_interface_main_t * im = &vnm->interface_main;
3197   ip4_main_t * ipm = &ip4_main;
3198   ip_lookup_main_t * lm = &ipm->lookup_main;
3199   vnet_classify_main_t * cm = &vnet_classify_main;
3200
3201   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3202     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3203
3204   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3205     return VNET_API_ERROR_NO_SUCH_ENTRY;
3206
3207   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3208   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3209
3210   return 0;
3211 }
3212
3213 static clib_error_t *
3214 set_ip_classify_command_fn (vlib_main_t * vm,
3215                             unformat_input_t * input,
3216                             vlib_cli_command_t * cmd)
3217 {
3218   u32 table_index = ~0;
3219   int table_index_set = 0;
3220   u32 sw_if_index = ~0;
3221   int rv;
3222   
3223   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3224     if (unformat (input, "table-index %d", &table_index))
3225       table_index_set = 1;
3226     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3227                        vnet_get_main(), &sw_if_index))
3228       ;
3229     else
3230       break;
3231   }
3232       
3233   if (table_index_set == 0)
3234     return clib_error_return (0, "classify table-index must be specified");
3235
3236   if (sw_if_index == ~0)
3237     return clib_error_return (0, "interface / subif must be specified");
3238
3239   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3240
3241   switch (rv)
3242     {
3243     case 0:
3244       break;
3245
3246     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3247       return clib_error_return (0, "No such interface");
3248
3249     case VNET_API_ERROR_NO_SUCH_ENTRY:
3250       return clib_error_return (0, "No such classifier table");
3251     }
3252   return 0;
3253 }
3254
3255 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3256     .path = "set ip classify",
3257     .short_help = 
3258     "set ip classify intfc <int> table-index <index>",
3259     .function = set_ip_classify_command_fn,
3260 };
3261