a50664cc6627fbb2616a4c0858ca3d656a22f66e
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index
246       && adj_index != ~0
247       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
248     {
249       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
250       if (adj->share_count > 0)
251         adj->share_count --;
252     }
253
254   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
255                                is_del ? old_adj_index : adj_index,
256                                is_del);
257
258   /* Delete old adjacency index if present and changed. */
259   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
260       && old_adj_index != ~0
261       && old_adj_index != adj_index)
262     ip_del_adjacency (lm, old_adj_index);
263 }
264
265 void
266 ip4_add_del_route_next_hop (ip4_main_t * im,
267                             u32 flags,
268                             ip4_address_t * dst_address,
269                             u32 dst_address_length,
270                             ip4_address_t * next_hop,
271                             u32 next_hop_sw_if_index,
272                             u32 next_hop_weight, u32 adj_index, 
273                             u32 explicit_fib_index)
274 {
275   vnet_main_t * vnm = vnet_get_main();
276   ip_lookup_main_t * lm = &im->lookup_main;
277   u32 fib_index;
278   ip4_fib_t * fib;
279   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
280   u32 dst_adj_index, nh_adj_index;
281   uword * dst_hash, * dst_result;
282   uword * nh_hash, * nh_result;
283   ip_adjacency_t * dst_adj;
284   ip_multipath_adjacency_t * old_mp, * new_mp;
285   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
286   int is_interface_next_hop;
287   clib_error_t * error = 0;
288
289   if (explicit_fib_index == (u32)~0)
290       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
291   else
292       fib_index = explicit_fib_index;
293
294   fib = vec_elt_at_index (im->fibs, fib_index);
295   
296   /* Lookup next hop to be added or deleted. */
297   is_interface_next_hop = next_hop->data_u32 == 0;
298   if (adj_index == (u32)~0)
299     {
300       if (is_interface_next_hop)
301         {
302           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
303           if (nh_result)
304             nh_adj_index = *nh_result;
305           else
306             {
307               ip_adjacency_t * adj;
308               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
309                                       &nh_adj_index);
310               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
311               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
312               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
313             }
314         }
315       else
316         {
317           nh_hash = fib->adj_index_by_dst_address[32];
318           nh_result = hash_get (nh_hash, next_hop->data_u32);
319           
320           /* Next hop must be known. */
321           if (! nh_result)
322             {
323               ip_adjacency_t * adj;
324
325               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
326                                                         next_hop, 0);
327               adj = ip_get_adjacency (lm, nh_adj_index);
328               /* if ARP interface adjacencty is present, we need to
329                  install ARP adjaceny for specific next hop */
330               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
331                   adj->arp.next_hop.ip4.as_u32 == 0)
332                 {
333                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
334                 }
335               else
336                 {
337                   /* Next hop is not known, so create indirect adj */
338                   ip_adjacency_t add_adj;
339                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
340                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
341                   add_adj.explicit_fib_index = explicit_fib_index;
342                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
343                 }
344             }
345           else
346             nh_adj_index = *nh_result;
347         }
348     }
349   else
350     {
351       nh_adj_index = adj_index;
352     }
353   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
354   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
355
356   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
357   dst_result = hash_get (dst_hash, dst_address_u32);
358   if (dst_result)
359     {
360       dst_adj_index = dst_result[0];
361       dst_adj = ip_get_adjacency (lm, dst_adj_index);
362     }
363   else
364     {
365       /* For deletes destination must be known. */
366       if (is_del)
367         {
368           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
369           error = clib_error_return (0, "unknown destination %U/%d",
370                                      format_ip4_address, dst_address,
371                                      dst_address_length);
372           goto done;
373         }
374
375       dst_adj_index = ~0;
376       dst_adj = 0;
377     }
378
379   /* Ignore adds of X/32 with next hop of X. */
380   if (! is_del
381       && dst_address_length == 32
382       && dst_address->data_u32 == next_hop->data_u32 
383       && adj_index != (u32)~0)
384     {
385       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
386       error = clib_error_return (0, "prefix matches next hop %U/%d",
387                                  format_ip4_address, dst_address,
388                                  dst_address_length);
389       goto done;
390     }
391
392   /* Destination is not known and default weight is set so add route
393      to existing non-multipath adjacency */
394   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
395     {
396       /* create new adjacency */
397       ip4_add_del_route_args_t a;
398       a.table_index_or_table_id = fib_index;
399       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
400                  | IP4_ROUTE_FLAG_FIB_INDEX
401                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
402                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
403                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
404       a.dst_address = dst_address[0];
405       a.dst_address_length = dst_address_length;
406       a.adj_index = nh_adj_index;
407       a.add_adj = 0;
408       a.n_add_adj = 0;
409
410       ip4_add_del_route (im, &a);
411
412       goto done;
413     }
414
415   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
416
417   if (! ip_multipath_adjacency_add_del_next_hop
418       (lm, is_del,
419        old_mp_adj_index,
420        nh_adj_index,
421        next_hop_weight,
422        &new_mp_adj_index))
423     {
424       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
425       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
426                                  format_ip4_address, next_hop);
427       goto done;
428     }
429   
430   old_mp = new_mp = 0;
431   if (old_mp_adj_index != ~0)
432     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
433   if (new_mp_adj_index != ~0)
434     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
435
436   if (old_mp != new_mp)
437     {
438       ip4_add_del_route_args_t a;
439       a.table_index_or_table_id = fib_index;
440       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
441                  | IP4_ROUTE_FLAG_FIB_INDEX
442                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
443                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
444       a.dst_address = dst_address[0];
445       a.dst_address_length = dst_address_length;
446       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
447       a.add_adj = 0;
448       a.n_add_adj = 0;
449
450       ip4_add_del_route (im, &a);
451     }
452
453  done:
454   if (error)
455     clib_error_report (error);
456 }
457
458 void *
459 ip4_get_route (ip4_main_t * im,
460                u32 table_index_or_table_id,
461                u32 flags,
462                u8 * address,
463                u32 address_length)
464 {
465   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
466   u32 dst_address = * (u32 *) address;
467   uword * hash, * p;
468
469   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
470   dst_address &= im->fib_masks[address_length];
471
472   hash = fib->adj_index_by_dst_address[address_length];
473   p = hash_get (hash, dst_address);
474   return (void *) p;
475 }
476
477 void
478 ip4_foreach_matching_route (ip4_main_t * im,
479                             u32 table_index_or_table_id,
480                             u32 flags,
481                             ip4_address_t * address,
482                             u32 address_length,
483                             ip4_address_t ** results,
484                             u8 ** result_lengths)
485 {
486   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
487   u32 dst_address = address->data_u32;
488   u32 this_length = address_length;
489   
490   if (*results)
491     _vec_len (*results) = 0;
492   if (*result_lengths)
493     _vec_len (*result_lengths) = 0;
494
495   while (this_length <= 32 && vec_len (results) == 0)
496     {
497       uword k, v;
498       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
499         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
500           {
501             ip4_address_t a;
502             a.data_u32 = k;
503             vec_add1 (*results, a);
504             vec_add1 (*result_lengths, this_length);
505           }
506       }));
507
508       this_length++;
509     }
510 }
511
512 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
513                                   u32 table_index_or_table_id,
514                                   u32 flags)
515 {
516   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
517   ip_lookup_main_t * lm = &im->lookup_main;
518   u32 i, l;
519   ip4_address_t a;
520   ip4_add_del_route_callback_t * cb;
521   static ip4_address_t * to_delete;
522
523   if (lm->n_adjacency_remaps == 0)
524     return;
525
526   for (l = 0; l <= 32; l++)
527     {
528       hash_pair_t * p;
529       uword * hash = fib->adj_index_by_dst_address[l];
530
531       if (hash_elts (hash) == 0)
532         continue;
533
534       if (to_delete)
535         _vec_len (to_delete) = 0;
536
537       hash_foreach_pair (p, hash, ({
538         u32 adj_index = p->value[0];
539         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
540
541         if (m)
542           {
543             /* Record destination address from hash key. */
544             a.data_u32 = p->key;
545
546             /* New adjacency points to nothing: so delete prefix. */
547             if (m == ~0)
548               vec_add1 (to_delete, a);
549             else
550               {
551                 /* Remap to new adjacency. */
552                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
553
554                 /* Set new adjacency value. */
555                 fib->new_hash_values[0] = p->value[0] = m - 1;
556
557                 vec_foreach (cb, im->add_del_route_callbacks)
558                   if ((flags & cb->required_flags) == cb->required_flags)
559                     cb->function (im, cb->function_opaque,
560                                   fib, flags | IP4_ROUTE_FLAG_ADD,
561                                   &a, l,
562                                   fib->old_hash_values,
563                                   fib->new_hash_values);
564               }
565           }
566       }));
567
568       fib->new_hash_values[0] = ~0;
569       for (i = 0; i < vec_len (to_delete); i++)
570         {
571           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
572           vec_foreach (cb, im->add_del_route_callbacks)
573             if ((flags & cb->required_flags) == cb->required_flags)
574               cb->function (im, cb->function_opaque,
575                             fib, flags | IP4_ROUTE_FLAG_DEL,
576                             &a, l,
577                             fib->old_hash_values,
578                             fib->new_hash_values);
579         }
580     }
581
582   /* Also remap adjacencies in mtrie. */
583   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
584
585   /* Reset mapping table. */
586   vec_zero (lm->adjacency_remap_table);
587
588   /* All remaps have been performed. */
589   lm->n_adjacency_remaps = 0;
590 }
591
592 void ip4_delete_matching_routes (ip4_main_t * im,
593                                  u32 table_index_or_table_id,
594                                  u32 flags,
595                                  ip4_address_t * address,
596                                  u32 address_length)
597 {
598   static ip4_address_t * matching_addresses;
599   static u8 * matching_address_lengths;
600   u32 l, i;
601   ip4_add_del_route_args_t a;
602
603   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
604   a.table_index_or_table_id = table_index_or_table_id;
605   a.adj_index = ~0;
606   a.add_adj = 0;
607   a.n_add_adj = 0;
608
609   for (l = address_length + 1; l <= 32; l++)
610     {
611       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
612                                   address,
613                                   l,
614                                   &matching_addresses,
615                                   &matching_address_lengths);
616       for (i = 0; i < vec_len (matching_addresses); i++)
617         {
618           a.dst_address = matching_addresses[i];
619           a.dst_address_length = matching_address_lengths[i];
620           ip4_add_del_route (im, &a);
621         }
622     }
623
624   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
625 }
626
627 always_inline uword
628 ip4_lookup_inline (vlib_main_t * vm,
629                    vlib_node_runtime_t * node,
630                    vlib_frame_t * frame,
631                    int lookup_for_responses_to_locally_received_packets,
632                    int is_indirect)
633 {
634   ip4_main_t * im = &ip4_main;
635   ip_lookup_main_t * lm = &im->lookup_main;
636   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
637   u32 n_left_from, n_left_to_next, * from, * to_next;
638   ip_lookup_next_t next;
639   u32 cpu_index = os_get_cpu_number();
640
641   from = vlib_frame_vector_args (frame);
642   n_left_from = frame->n_vectors;
643   next = node->cached_next_index;
644
645   while (n_left_from > 0)
646     {
647       vlib_get_next_frame (vm, node, next,
648                            to_next, n_left_to_next);
649
650       while (n_left_from >= 4 && n_left_to_next >= 2)
651         {
652           vlib_buffer_t * p0, * p1;
653           ip4_header_t * ip0, * ip1;
654           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
655           ip_lookup_next_t next0, next1;
656           ip_adjacency_t * adj0, * adj1;
657           ip4_fib_mtrie_t * mtrie0, * mtrie1;
658           ip4_fib_mtrie_leaf_t leaf0, leaf1;
659           ip4_address_t * dst_addr0, *dst_addr1;
660           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
661           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
662           u32 flow_hash_config0, flow_hash_config1;
663           u32 hash_c0, hash_c1;
664           u32 wrong_next;
665
666           /* Prefetch next iteration. */
667           {
668             vlib_buffer_t * p2, * p3;
669
670             p2 = vlib_get_buffer (vm, from[2]);
671             p3 = vlib_get_buffer (vm, from[3]);
672
673             vlib_prefetch_buffer_header (p2, LOAD);
674             vlib_prefetch_buffer_header (p3, LOAD);
675
676             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
677             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
678           }
679
680           pi0 = to_next[0] = from[0];
681           pi1 = to_next[1] = from[1];
682
683           p0 = vlib_get_buffer (vm, pi0);
684           p1 = vlib_get_buffer (vm, pi1);
685
686           ip0 = vlib_buffer_get_current (p0);
687           ip1 = vlib_buffer_get_current (p1);
688
689           if (is_indirect)
690             {
691               ip_adjacency_t * iadj0, * iadj1;
692               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
693               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
694               dst_addr0 = &iadj0->indirect.next_hop.ip4;
695               dst_addr1 = &iadj1->indirect.next_hop.ip4;
696             }
697           else
698             {
699               dst_addr0 = &ip0->dst_address;
700               dst_addr1 = &ip1->dst_address;
701             }
702
703           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
704           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
705           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
706             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
707           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
708             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
709
710
711           if (! lookup_for_responses_to_locally_received_packets)
712             {
713               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
714               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
715
716               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
717
718               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
719               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
720             }
721
722           tcp0 = (void *) (ip0 + 1);
723           tcp1 = (void *) (ip1 + 1);
724
725           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
726                          || ip0->protocol == IP_PROTOCOL_UDP);
727           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
728                          || ip1->protocol == IP_PROTOCOL_UDP);
729
730           if (! lookup_for_responses_to_locally_received_packets)
731             {
732               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
733               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
734             }
735
736           if (! lookup_for_responses_to_locally_received_packets)
737             {
738               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
739               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
740             }
741
742           if (! lookup_for_responses_to_locally_received_packets)
743             {
744               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
745               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
746             }
747
748           if (lookup_for_responses_to_locally_received_packets)
749             {
750               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
751               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
752             }
753           else
754             {
755               /* Handle default route. */
756               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
757               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
758
759               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
760               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
761             }
762
763           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
764                                                            dst_addr0,
765                                                            /* no_default_route */ 0));
766           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
767                                                            dst_addr1,
768                                                            /* no_default_route */ 0));
769           adj0 = ip_get_adjacency (lm, adj_index0);
770           adj1 = ip_get_adjacency (lm, adj_index1);
771
772           next0 = adj0->lookup_next_index;
773           next1 = adj1->lookup_next_index;
774
775           /* Use flow hash to compute multipath adjacency. */
776           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
777           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
778           if (PREDICT_FALSE (adj0->n_adj > 1))
779             {
780               flow_hash_config0 = 
781                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
782               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
783                 ip4_compute_flow_hash (ip0, flow_hash_config0);
784             }
785           if (PREDICT_FALSE(adj1->n_adj > 1))
786             {
787               flow_hash_config1 = 
788                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
789               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
790                 ip4_compute_flow_hash (ip1, flow_hash_config1);
791             }
792
793           ASSERT (adj0->n_adj > 0);
794           ASSERT (adj1->n_adj > 0);
795           ASSERT (is_pow2 (adj0->n_adj));
796           ASSERT (is_pow2 (adj1->n_adj));
797           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
798           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
799
800           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
801           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
802
803           vlib_increment_combined_counter 
804               (cm, cpu_index, adj_index0, 1,
805                vlib_buffer_length_in_chain (vm, p0) 
806                + sizeof(ethernet_header_t));
807           vlib_increment_combined_counter 
808               (cm, cpu_index, adj_index1, 1,
809                vlib_buffer_length_in_chain (vm, p1)
810                + sizeof(ethernet_header_t));
811
812           from += 2;
813           to_next += 2;
814           n_left_to_next -= 2;
815           n_left_from -= 2;
816
817           wrong_next = (next0 != next) + 2*(next1 != next);
818           if (PREDICT_FALSE (wrong_next != 0))
819             {
820               switch (wrong_next)
821                 {
822                 case 1:
823                   /* A B A */
824                   to_next[-2] = pi1;
825                   to_next -= 1;
826                   n_left_to_next += 1;
827                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
828                   break;
829
830                 case 2:
831                   /* A A B */
832                   to_next -= 1;
833                   n_left_to_next += 1;
834                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
835                   break;
836
837                 case 3:
838                   /* A B C */
839                   to_next -= 2;
840                   n_left_to_next += 2;
841                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
843                   if (next0 == next1)
844                     {
845                       /* A B B */
846                       vlib_put_next_frame (vm, node, next, n_left_to_next);
847                       next = next1;
848                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
849                     }
850                 }
851             }
852         }
853     
854       while (n_left_from > 0 && n_left_to_next > 0)
855         {
856           vlib_buffer_t * p0;
857           ip4_header_t * ip0;
858           __attribute__((unused)) tcp_header_t * tcp0;
859           ip_lookup_next_t next0;
860           ip_adjacency_t * adj0;
861           ip4_fib_mtrie_t * mtrie0;
862           ip4_fib_mtrie_leaf_t leaf0;
863           ip4_address_t * dst_addr0;
864           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
865           u32 flow_hash_config0, hash_c0;
866
867           pi0 = from[0];
868           to_next[0] = pi0;
869
870           p0 = vlib_get_buffer (vm, pi0);
871
872           ip0 = vlib_buffer_get_current (p0);
873
874           if (is_indirect)
875             {
876               ip_adjacency_t * iadj0;
877               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
878               dst_addr0 = &iadj0->indirect.next_hop.ip4;
879             }
880           else
881             {
882               dst_addr0 = &ip0->dst_address;
883             }
884
885           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
886           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
887             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
888
889           if (! lookup_for_responses_to_locally_received_packets)
890             {
891               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
892
893               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
894
895               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
896             }
897
898           tcp0 = (void *) (ip0 + 1);
899
900           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
901                          || ip0->protocol == IP_PROTOCOL_UDP);
902
903           if (! lookup_for_responses_to_locally_received_packets)
904             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
905
906           if (! lookup_for_responses_to_locally_received_packets)
907             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
908
909           if (! lookup_for_responses_to_locally_received_packets)
910             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
911
912           if (lookup_for_responses_to_locally_received_packets)
913             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
914           else
915             {
916               /* Handle default route. */
917               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
918               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
919             }
920
921           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
922                                                            dst_addr0,
923                                                            /* no_default_route */ 0));
924
925           adj0 = ip_get_adjacency (lm, adj_index0);
926
927           next0 = adj0->lookup_next_index;
928
929           /* Use flow hash to compute multipath adjacency. */
930           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
931           if (PREDICT_FALSE(adj0->n_adj > 1))
932             {
933               flow_hash_config0 = 
934                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
935
936               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
937                 ip4_compute_flow_hash (ip0, flow_hash_config0);
938             }
939
940           ASSERT (adj0->n_adj > 0);
941           ASSERT (is_pow2 (adj0->n_adj));
942           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
943
944           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
945
946           vlib_increment_combined_counter 
947               (cm, cpu_index, adj_index0, 1,
948                vlib_buffer_length_in_chain (vm, p0)
949                + sizeof(ethernet_header_t));
950
951           from += 1;
952           to_next += 1;
953           n_left_to_next -= 1;
954           n_left_from -= 1;
955
956           if (PREDICT_FALSE (next0 != next))
957             {
958               n_left_to_next += 1;
959               vlib_put_next_frame (vm, node, next, n_left_to_next);
960               next = next0;
961               vlib_get_next_frame (vm, node, next,
962                                    to_next, n_left_to_next);
963               to_next[0] = pi0;
964               to_next += 1;
965               n_left_to_next -= 1;
966             }
967         }
968
969       vlib_put_next_frame (vm, node, next, n_left_to_next);
970     }
971
972   return frame->n_vectors;
973 }
974
975 static uword
976 ip4_lookup (vlib_main_t * vm,
977             vlib_node_runtime_t * node,
978             vlib_frame_t * frame)
979 {
980   return ip4_lookup_inline (vm, node, frame,
981                             /* lookup_for_responses_to_locally_received_packets */ 0,
982                             /* is_indirect */ 0);
983
984 }
985
986 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
987                                         ip_adjacency_t * adj,
988                                         u32 sw_if_index,
989                                         u32 if_address_index)
990 {
991   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
992   ip_lookup_next_t n;
993   vnet_l3_packet_type_t packet_type;
994   u32 node_index;
995
996   if (hw->hw_class_index == ethernet_hw_interface_class.index
997       || hw->hw_class_index == srp_hw_interface_class.index)
998     {
999       /* 
1000        * We have a bit of a problem in this case. ip4-arp uses
1001        * the rewrite_header.next_index to hand pkts to the
1002        * indicated inteface output node. We can end up in
1003        * ip4_rewrite_local, too, which also pays attention to 
1004        * rewrite_header.next index. Net result: a hack in
1005        * ip4_rewrite_local...
1006        */
1007       n = IP_LOOKUP_NEXT_ARP;
1008       node_index = ip4_arp_node.index;
1009       adj->if_address_index = if_address_index;
1010       adj->arp.next_hop.ip4.as_u32 = 0;
1011       ip46_address_reset(&adj->arp.next_hop);
1012       packet_type = VNET_L3_PACKET_TYPE_ARP;
1013     }
1014   else
1015     {
1016       n = IP_LOOKUP_NEXT_REWRITE;
1017       node_index = ip4_rewrite_node.index;
1018       packet_type = VNET_L3_PACKET_TYPE_IP4;
1019     }
1020
1021   adj->lookup_next_index = n;
1022   vnet_rewrite_for_sw_interface
1023     (vnm,
1024      packet_type,
1025      sw_if_index,
1026      node_index,
1027      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1028      &adj->rewrite_header,
1029      sizeof (adj->rewrite_data));
1030 }
1031
1032 static void
1033 ip4_add_interface_routes (u32 sw_if_index,
1034                           ip4_main_t * im, u32 fib_index,
1035                           ip_interface_address_t * a)
1036 {
1037   vnet_main_t * vnm = vnet_get_main();
1038   ip_lookup_main_t * lm = &im->lookup_main;
1039   ip_adjacency_t * adj;
1040   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1041   ip4_add_del_route_args_t x;
1042   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1043   u32 classify_table_index;
1044
1045   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1046   x.table_index_or_table_id = fib_index;
1047   x.flags = (IP4_ROUTE_FLAG_ADD
1048              | IP4_ROUTE_FLAG_FIB_INDEX
1049              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1050   x.dst_address = address[0];
1051   x.dst_address_length = a->address_length;
1052   x.n_add_adj = 0;
1053   x.add_adj = 0;
1054
1055   a->neighbor_probe_adj_index = ~0;
1056   if (a->address_length < 32)
1057     {
1058       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1059                               &x.adj_index);
1060       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1061       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1062       ip4_add_del_route (im, &x);
1063       a->neighbor_probe_adj_index = x.adj_index;
1064     }
1065   
1066   /* Add e.g. 1.1.1.1/32 as local to this host. */
1067   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1068                           &x.adj_index);
1069   
1070   classify_table_index = ~0;
1071   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1072     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1073   if (classify_table_index != (u32) ~0)
1074     {
1075       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1076       adj->classify.table_index = classify_table_index;
1077     }
1078   else
1079     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1080   
1081   adj->if_address_index = a - lm->if_address_pool;
1082   adj->rewrite_header.sw_if_index = sw_if_index;
1083   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1084   /* 
1085    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1086    * fail an RPF-ish check, but still go thru the rewrite code...
1087    */
1088   adj->rewrite_header.data_bytes = 0;
1089
1090   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1091   x.dst_address_length = 32;
1092   ip4_add_del_route (im, &x);
1093 }
1094
1095 static void
1096 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1097 {
1098   ip4_add_del_route_args_t x;
1099
1100   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1101   x.table_index_or_table_id = fib_index;
1102   x.flags = (IP4_ROUTE_FLAG_DEL
1103              | IP4_ROUTE_FLAG_FIB_INDEX
1104              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1105   x.dst_address = address[0];
1106   x.dst_address_length = address_length;
1107   x.adj_index = ~0;
1108   x.n_add_adj = 0;
1109   x.add_adj = 0;
1110
1111   if (address_length < 32)
1112     ip4_add_del_route (im, &x);
1113
1114   x.dst_address_length = 32;
1115   ip4_add_del_route (im, &x);
1116
1117   ip4_delete_matching_routes (im,
1118                               fib_index,
1119                               IP4_ROUTE_FLAG_FIB_INDEX,
1120                               address,
1121                               address_length);
1122 }
1123
1124 typedef struct {
1125     u32 sw_if_index;
1126     ip4_address_t address;
1127     u32 length;
1128 } ip4_interface_address_t;
1129
1130 static clib_error_t *
1131 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1132                                         u32 sw_if_index,
1133                                         ip4_address_t * new_address,
1134                                         u32 new_length,
1135                                         u32 redistribute,
1136                                         u32 insert_routes,
1137                                         u32 is_del);
1138
1139 static clib_error_t *
1140 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1141                                         u32 sw_if_index,
1142                                         ip4_address_t * address,
1143                                         u32 address_length,
1144                                         u32 redistribute,
1145                                         u32 insert_routes,
1146                                         u32 is_del)
1147 {
1148   vnet_main_t * vnm = vnet_get_main();
1149   ip4_main_t * im = &ip4_main;
1150   ip_lookup_main_t * lm = &im->lookup_main;
1151   clib_error_t * error = 0;
1152   u32 if_address_index, elts_before;
1153   ip4_address_fib_t ip4_af, * addr_fib = 0;
1154
1155   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1156   ip4_addr_fib_init (&ip4_af, address,
1157                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1158   vec_add1 (addr_fib, ip4_af);
1159
1160   /* When adding an address check that it does not conflict with an existing address. */
1161   if (! is_del)
1162     {
1163       ip_interface_address_t * ia;
1164       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1165                                     0 /* honor unnumbered */,
1166       ({
1167         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1168
1169         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1170             || ip4_destination_matches_route (im, x, address, address_length))
1171           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1172                                     format_ip4_address_and_length, address, address_length,
1173                                     format_ip4_address_and_length, x, ia->address_length,
1174                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1175       }));
1176     }
1177
1178   elts_before = pool_elts (lm->if_address_pool);
1179
1180   error = ip_interface_address_add_del
1181     (lm,
1182      sw_if_index,
1183      addr_fib,
1184      address_length,
1185      is_del,
1186      &if_address_index);
1187   if (error)
1188     goto done;
1189   
1190   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1191     {
1192       if (is_del)
1193         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1194                                   address_length);
1195       
1196       else
1197           ip4_add_interface_routes (sw_if_index,
1198                                     im, ip4_af.fib_index,
1199                                     pool_elt_at_index 
1200                                     (lm->if_address_pool, if_address_index));
1201     }
1202
1203   /* If pool did not grow/shrink: add duplicate address. */
1204   if (elts_before != pool_elts (lm->if_address_pool))
1205     {
1206       ip4_add_del_interface_address_callback_t * cb;
1207       vec_foreach (cb, im->add_del_interface_address_callbacks)
1208         cb->function (im, cb->function_opaque, sw_if_index,
1209                       address, address_length,
1210                       if_address_index,
1211                       is_del);
1212     }
1213
1214  done:
1215   vec_free (addr_fib);
1216   return error;
1217 }
1218
1219 clib_error_t *
1220 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1221                                ip4_address_t * address, u32 address_length,
1222                                u32 is_del)
1223 {
1224   return ip4_add_del_interface_address_internal
1225     (vm, sw_if_index, address, address_length,
1226      /* redistribute */ 1,
1227      /* insert_routes */ 1,
1228      is_del);
1229 }
1230
1231 static clib_error_t *
1232 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1233                                 u32 sw_if_index,
1234                                 u32 flags)
1235 {
1236   ip4_main_t * im = &ip4_main;
1237   ip_interface_address_t * ia;
1238   ip4_address_t * a;
1239   u32 is_admin_up, fib_index;
1240   
1241   /* Fill in lookup tables with default table (0). */
1242   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1243   
1244   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1245   
1246   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1247   
1248   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1249
1250   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1251                                 0 /* honor unnumbered */,
1252   ({
1253     a = ip_interface_address_get_address (&im->lookup_main, ia);
1254     if (is_admin_up)
1255       ip4_add_interface_routes (sw_if_index,
1256                                 im, fib_index,
1257                                 ia);
1258     else
1259       ip4_del_interface_routes (im, fib_index,
1260                                 a, ia->address_length);
1261   }));
1262
1263   return 0;
1264 }
1265  
1266 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1267
1268 static clib_error_t *
1269 ip4_sw_interface_add_del (vnet_main_t * vnm,
1270                           u32 sw_if_index,
1271                           u32 is_add)
1272 {
1273   vlib_main_t * vm = vnm->vlib_main;
1274   ip4_main_t * im = &ip4_main;
1275   ip_lookup_main_t * lm = &im->lookup_main;
1276   u32 ci, cast;
1277
1278   for (cast = 0; cast < VNET_N_CAST; cast++)
1279     {
1280       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1281       vnet_config_main_t * vcm = &cm->config_main;
1282
1283       if (! vcm->node_index_by_feature_index)
1284         {
1285           if (cast == VNET_UNICAST)
1286             {
1287               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1288               static char * feature_nodes[] = {
1289                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1290                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1291                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1292                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1293                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1294                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1295               };
1296
1297               vnet_config_init (vm, vcm,
1298                                 start_nodes, ARRAY_LEN (start_nodes),
1299                                 feature_nodes, ARRAY_LEN (feature_nodes));
1300             }
1301           else
1302             {
1303               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1304               static char * feature_nodes[] = {
1305                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1306                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1307               };
1308
1309               vnet_config_init (vm, vcm,
1310                                 start_nodes, ARRAY_LEN (start_nodes),
1311                                 feature_nodes, ARRAY_LEN (feature_nodes));
1312             }
1313         }
1314
1315       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1316       ci = cm->config_index_by_sw_if_index[sw_if_index];
1317
1318       if (is_add)
1319         ci = vnet_config_add_feature (vm, vcm,
1320                                       ci,
1321                                       IP4_RX_FEATURE_LOOKUP,
1322                                       /* config data */ 0,
1323                                       /* # bytes of config data */ 0);
1324       else
1325         ci = vnet_config_del_feature (vm, vcm,
1326                                       ci,
1327                                       IP4_RX_FEATURE_LOOKUP,
1328                                       /* config data */ 0,
1329                                       /* # bytes of config data */ 0);
1330
1331       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1332     }
1333
1334   return /* no error */ 0;
1335 }
1336
1337 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1338
1339
1340 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1341   .function = ip4_lookup,
1342   .name = "ip4-lookup",
1343   .vector_size = sizeof (u32),
1344
1345   .n_next_nodes = IP_LOOKUP_N_NEXT,
1346   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1347 };
1348
1349 static uword
1350 ip4_indirect (vlib_main_t * vm,
1351                vlib_node_runtime_t * node,
1352                vlib_frame_t * frame)
1353 {
1354   return ip4_lookup_inline (vm, node, frame,
1355                             /* lookup_for_responses_to_locally_received_packets */ 0,
1356                             /* is_indirect */ 1);
1357 }
1358
1359 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1360   .function = ip4_indirect,
1361   .name = "ip4-indirect",
1362   .vector_size = sizeof (u32),
1363
1364   .n_next_nodes = IP_LOOKUP_N_NEXT,
1365   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1366 };
1367
1368
1369 /* Global IP4 main. */
1370 ip4_main_t ip4_main;
1371
1372 clib_error_t *
1373 ip4_lookup_init (vlib_main_t * vm)
1374 {
1375   ip4_main_t * im = &ip4_main;
1376   uword i;
1377
1378   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1379     {
1380       u32 m;
1381
1382       if (i < 32)
1383         m = pow2_mask (i) << (32 - i);
1384       else 
1385         m = ~0;
1386       im->fib_masks[i] = clib_host_to_net_u32 (m);
1387     }
1388
1389   /* Create FIB with index 0 and table id of 0. */
1390   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1391
1392   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1393
1394   {
1395     pg_node_t * pn;
1396     pn = pg_get_node (ip4_lookup_node.index);
1397     pn->unformat_edit = unformat_pg_ip4_header;
1398   }
1399
1400   {
1401     ethernet_arp_header_t h;
1402
1403     memset (&h, 0, sizeof (h));
1404
1405     /* Set target ethernet address to all zeros. */
1406     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1407
1408 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1409 #define _8(f,v) h.f = v;
1410     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1411     _16 (l3_type, ETHERNET_TYPE_IP4);
1412     _8 (n_l2_address_bytes, 6);
1413     _8 (n_l3_address_bytes, 4);
1414     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1415 #undef _16
1416 #undef _8
1417
1418     vlib_packet_template_init (vm,
1419                                &im->ip4_arp_request_packet_template,
1420                                /* data */ &h,
1421                                sizeof (h),
1422                                /* alloc chunk size */ 8,
1423                                "ip4 arp");
1424   }
1425
1426   return 0;
1427 }
1428
1429 VLIB_INIT_FUNCTION (ip4_lookup_init);
1430
1431 typedef struct {
1432   /* Adjacency taken. */
1433   u32 adj_index;
1434   u32 flow_hash;
1435   u32 fib_index;
1436
1437   /* Packet data, possibly *after* rewrite. */
1438   u8 packet_data[64 - 1*sizeof(u32)];
1439 } ip4_forward_next_trace_t;
1440
1441 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1442 {
1443   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1444   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1445   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1446   vnet_main_t * vnm = vnet_get_main();
1447   ip4_main_t * im = &ip4_main;
1448   ip_adjacency_t * adj;
1449   uword indent = format_get_indent (s);
1450
1451   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1452   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1453               t->fib_index, t->adj_index, format_ip_adjacency,
1454               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1455   switch (adj->lookup_next_index)
1456     {
1457     case IP_LOOKUP_NEXT_REWRITE:
1458       s = format (s, "\n%U%U",
1459                   format_white_space, indent,
1460                   format_ip_adjacency_packet_data,
1461                   vnm, &im->lookup_main, t->adj_index,
1462                   t->packet_data, sizeof (t->packet_data));
1463       break;
1464
1465     default:
1466       break;
1467     }
1468
1469   return s;
1470 }
1471
1472 /* Common trace function for all ip4-forward next nodes. */
1473 void
1474 ip4_forward_next_trace (vlib_main_t * vm,
1475                         vlib_node_runtime_t * node,
1476                         vlib_frame_t * frame,
1477                         vlib_rx_or_tx_t which_adj_index)
1478 {
1479   u32 * from, n_left;
1480   ip4_main_t * im = &ip4_main;
1481
1482   n_left = frame->n_vectors;
1483   from = vlib_frame_vector_args (frame);
1484   
1485   while (n_left >= 4)
1486     {
1487       u32 bi0, bi1;
1488       vlib_buffer_t * b0, * b1;
1489       ip4_forward_next_trace_t * t0, * t1;
1490
1491       /* Prefetch next iteration. */
1492       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1493       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1494
1495       bi0 = from[0];
1496       bi1 = from[1];
1497
1498       b0 = vlib_get_buffer (vm, bi0);
1499       b1 = vlib_get_buffer (vm, bi1);
1500
1501       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1502         {
1503           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1504           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1505           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1506           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1507                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1508           clib_memcpy (t0->packet_data,
1509                   vlib_buffer_get_current (b0),
1510                   sizeof (t0->packet_data));
1511         }
1512       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1513         {
1514           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1515           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1516           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1517           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1518                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1519           clib_memcpy (t1->packet_data,
1520                   vlib_buffer_get_current (b1),
1521                   sizeof (t1->packet_data));
1522         }
1523       from += 2;
1524       n_left -= 2;
1525     }
1526
1527   while (n_left >= 1)
1528     {
1529       u32 bi0;
1530       vlib_buffer_t * b0;
1531       ip4_forward_next_trace_t * t0;
1532
1533       bi0 = from[0];
1534
1535       b0 = vlib_get_buffer (vm, bi0);
1536
1537       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1538         {
1539           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1540           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1541           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1542           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1543                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1544           clib_memcpy (t0->packet_data,
1545                   vlib_buffer_get_current (b0),
1546                   sizeof (t0->packet_data));
1547         }
1548       from += 1;
1549       n_left -= 1;
1550     }
1551 }
1552
1553 static uword
1554 ip4_drop_or_punt (vlib_main_t * vm,
1555                   vlib_node_runtime_t * node,
1556                   vlib_frame_t * frame,
1557                   ip4_error_t error_code)
1558 {
1559   u32 * buffers = vlib_frame_vector_args (frame);
1560   uword n_packets = frame->n_vectors;
1561
1562   vlib_error_drop_buffers (vm, node,
1563                            buffers,
1564                            /* stride */ 1,
1565                            n_packets,
1566                            /* next */ 0,
1567                            ip4_input_node.index,
1568                            error_code);
1569
1570   if (node->flags & VLIB_NODE_FLAG_TRACE)
1571     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1572
1573   return n_packets;
1574 }
1575
1576 static uword
1577 ip4_drop (vlib_main_t * vm,
1578           vlib_node_runtime_t * node,
1579           vlib_frame_t * frame)
1580 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1581
1582 static uword
1583 ip4_punt (vlib_main_t * vm,
1584           vlib_node_runtime_t * node,
1585           vlib_frame_t * frame)
1586 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1587
1588 static uword
1589 ip4_miss (vlib_main_t * vm,
1590           vlib_node_runtime_t * node,
1591           vlib_frame_t * frame)
1592 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1593
1594 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1595   .function = ip4_drop,
1596   .name = "ip4-drop",
1597   .vector_size = sizeof (u32),
1598
1599   .format_trace = format_ip4_forward_next_trace,
1600
1601   .n_next_nodes = 1,
1602   .next_nodes = {
1603     [0] = "error-drop",
1604   },
1605 };
1606
1607 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1608   .function = ip4_punt,
1609   .name = "ip4-punt",
1610   .vector_size = sizeof (u32),
1611
1612   .format_trace = format_ip4_forward_next_trace,
1613
1614   .n_next_nodes = 1,
1615   .next_nodes = {
1616     [0] = "error-punt",
1617   },
1618 };
1619
1620 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1621   .function = ip4_miss,
1622   .name = "ip4-miss",
1623   .vector_size = sizeof (u32),
1624
1625   .format_trace = format_ip4_forward_next_trace,
1626
1627   .n_next_nodes = 1,
1628   .next_nodes = {
1629     [0] = "error-drop",
1630   },
1631 };
1632
1633 /* Compute TCP/UDP/ICMP4 checksum in software. */
1634 u16
1635 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1636                               ip4_header_t * ip0)
1637 {
1638   ip_csum_t sum0;
1639   u32 ip_header_length, payload_length_host_byte_order;
1640   u32 n_this_buffer, n_bytes_left;
1641   u16 sum16;
1642   void * data_this_buffer;
1643   
1644   /* Initialize checksum with ip header. */
1645   ip_header_length = ip4_header_bytes (ip0);
1646   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1647   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1648
1649   if (BITS (uword) == 32)
1650     {
1651       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1652       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1653     }
1654   else
1655     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1656
1657   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1658   data_this_buffer = (void *) ip0 + ip_header_length;
1659   if (n_this_buffer + ip_header_length > p0->current_length)
1660     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1661   while (1)
1662     {
1663       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1664       n_bytes_left -= n_this_buffer;
1665       if (n_bytes_left == 0)
1666         break;
1667
1668       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1669       p0 = vlib_get_buffer (vm, p0->next_buffer);
1670       data_this_buffer = vlib_buffer_get_current (p0);
1671       n_this_buffer = p0->current_length;
1672     }
1673
1674   sum16 = ~ ip_csum_fold (sum0);
1675
1676   return sum16;
1677 }
1678
1679 static u32
1680 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1681 {
1682   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1683   udp_header_t * udp0;
1684   u16 sum16;
1685
1686   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1687           || ip0->protocol == IP_PROTOCOL_UDP);
1688
1689   udp0 = (void *) (ip0 + 1);
1690   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1691     {
1692       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1693                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1694       return p0->flags;
1695     }
1696
1697   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1698
1699   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1700                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1701
1702   return p0->flags;
1703 }
1704
1705 static uword
1706 ip4_local (vlib_main_t * vm,
1707            vlib_node_runtime_t * node,
1708            vlib_frame_t * frame)
1709 {
1710   ip4_main_t * im = &ip4_main;
1711   ip_lookup_main_t * lm = &im->lookup_main;
1712   ip_local_next_t next_index;
1713   u32 * from, * to_next, n_left_from, n_left_to_next;
1714   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1715
1716   from = vlib_frame_vector_args (frame);
1717   n_left_from = frame->n_vectors;
1718   next_index = node->cached_next_index;
1719   
1720   if (node->flags & VLIB_NODE_FLAG_TRACE)
1721     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1722
1723   while (n_left_from > 0)
1724     {
1725       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1726
1727       while (n_left_from >= 4 && n_left_to_next >= 2)
1728         {
1729           vlib_buffer_t * p0, * p1;
1730           ip4_header_t * ip0, * ip1;
1731           udp_header_t * udp0, * udp1;
1732           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1733           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1734           ip_adjacency_t * adj0, * adj1;
1735           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1736           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1737           i32 len_diff0, len_diff1;
1738           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1739           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1740           u8 enqueue_code;
1741       
1742           pi0 = to_next[0] = from[0];
1743           pi1 = to_next[1] = from[1];
1744           from += 2;
1745           n_left_from -= 2;
1746           to_next += 2;
1747           n_left_to_next -= 2;
1748       
1749           p0 = vlib_get_buffer (vm, pi0);
1750           p1 = vlib_get_buffer (vm, pi1);
1751
1752           ip0 = vlib_buffer_get_current (p0);
1753           ip1 = vlib_buffer_get_current (p1);
1754
1755           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1756                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1757           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1758                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1759
1760           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1761           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1762
1763           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1764
1765           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1766           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1767
1768           proto0 = ip0->protocol;
1769           proto1 = ip1->protocol;
1770           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1771           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1772           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1773           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1774
1775           flags0 = p0->flags;
1776           flags1 = p1->flags;
1777
1778           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1779           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1780
1781           udp0 = ip4_next_header (ip0);
1782           udp1 = ip4_next_header (ip1);
1783
1784           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1785           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1786           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1787
1788           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1789           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1790
1791           /* Verify UDP length. */
1792           ip_len0 = clib_net_to_host_u16 (ip0->length);
1793           ip_len1 = clib_net_to_host_u16 (ip1->length);
1794           udp_len0 = clib_net_to_host_u16 (udp0->length);
1795           udp_len1 = clib_net_to_host_u16 (udp1->length);
1796
1797           len_diff0 = ip_len0 - udp_len0;
1798           len_diff1 = ip_len1 - udp_len1;
1799
1800           len_diff0 = is_udp0 ? len_diff0 : 0;
1801           len_diff1 = is_udp1 ? len_diff1 : 0;
1802
1803           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1804                                 & good_tcp_udp0 & good_tcp_udp1)))
1805             {
1806               if (is_tcp_udp0)
1807                 {
1808                   if (is_tcp_udp0
1809                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1810                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1811                   good_tcp_udp0 =
1812                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1813                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1814                 }
1815               if (is_tcp_udp1)
1816                 {
1817                   if (is_tcp_udp1
1818                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1819                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1820                   good_tcp_udp1 =
1821                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1822                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1823                 }
1824             }
1825
1826           good_tcp_udp0 &= len_diff0 >= 0;
1827           good_tcp_udp1 &= len_diff1 >= 0;
1828
1829           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1830           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1831
1832           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1833
1834           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1835           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1836
1837           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1838           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1839                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1840                     : error0);
1841           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1842                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1843                     : error1);
1844
1845           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1846           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1847
1848           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1849           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1850
1851           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1852           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1853
1854           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1855                                                            &ip0->src_address,
1856                                                            /* no_default_route */ 1));
1857           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1858                                                            &ip1->src_address,
1859                                                            /* no_default_route */ 1));
1860
1861           adj0 = ip_get_adjacency (lm, adj_index0);
1862           adj1 = ip_get_adjacency (lm, adj_index1);
1863
1864           /* 
1865            * Must have a route to source otherwise we drop the packet.
1866            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1867            */
1868           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1869                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1870                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1871                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1872                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1873                     ? IP4_ERROR_SRC_LOOKUP_MISS
1874                     : error0);
1875           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1876                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1877                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1878                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1879                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1880                     ? IP4_ERROR_SRC_LOOKUP_MISS
1881                     : error1);
1882
1883           next0 = lm->local_next_by_ip_protocol[proto0];
1884           next1 = lm->local_next_by_ip_protocol[proto1];
1885
1886           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1887           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1888
1889           p0->error = error0 ? error_node->errors[error0] : 0;
1890           p1->error = error1 ? error_node->errors[error1] : 0;
1891
1892           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1893
1894           if (PREDICT_FALSE (enqueue_code != 0))
1895             {
1896               switch (enqueue_code)
1897                 {
1898                 case 1:
1899                   /* A B A */
1900                   to_next[-2] = pi1;
1901                   to_next -= 1;
1902                   n_left_to_next += 1;
1903                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1904                   break;
1905
1906                 case 2:
1907                   /* A A B */
1908                   to_next -= 1;
1909                   n_left_to_next += 1;
1910                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1911                   break;
1912
1913                 case 3:
1914                   /* A B B or A B C */
1915                   to_next -= 2;
1916                   n_left_to_next += 2;
1917                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1918                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1919                   if (next0 == next1)
1920                     {
1921                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1922                       next_index = next1;
1923                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1924                     }
1925                   break;
1926                 }
1927             }
1928         }
1929
1930       while (n_left_from > 0 && n_left_to_next > 0)
1931         {
1932           vlib_buffer_t * p0;
1933           ip4_header_t * ip0;
1934           udp_header_t * udp0;
1935           ip4_fib_mtrie_t * mtrie0;
1936           ip4_fib_mtrie_leaf_t leaf0;
1937           ip_adjacency_t * adj0;
1938           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1939           i32 len_diff0;
1940           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1941       
1942           pi0 = to_next[0] = from[0];
1943           from += 1;
1944           n_left_from -= 1;
1945           to_next += 1;
1946           n_left_to_next -= 1;
1947       
1948           p0 = vlib_get_buffer (vm, pi0);
1949
1950           ip0 = vlib_buffer_get_current (p0);
1951
1952           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1953                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1954
1955           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1956
1957           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1958
1959           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1960
1961           proto0 = ip0->protocol;
1962           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1963           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1964
1965           flags0 = p0->flags;
1966
1967           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1968
1969           udp0 = ip4_next_header (ip0);
1970
1971           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1972           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1973
1974           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1975
1976           /* Verify UDP length. */
1977           ip_len0 = clib_net_to_host_u16 (ip0->length);
1978           udp_len0 = clib_net_to_host_u16 (udp0->length);
1979
1980           len_diff0 = ip_len0 - udp_len0;
1981
1982           len_diff0 = is_udp0 ? len_diff0 : 0;
1983
1984           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1985             {
1986               if (is_tcp_udp0)
1987                 {
1988                   if (is_tcp_udp0
1989                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1990                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1991                   good_tcp_udp0 =
1992                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1993                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1994                 }
1995             }
1996
1997           good_tcp_udp0 &= len_diff0 >= 0;
1998
1999           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2000
2001           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2002
2003           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2004
2005           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2006           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2007                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2008                     : error0);
2009
2010           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2011
2012           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2013           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2014
2015           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2016                                                            &ip0->src_address,
2017                                                            /* no_default_route */ 1));
2018
2019           adj0 = ip_get_adjacency (lm, adj_index0);
2020
2021           /* Must have a route to source otherwise we drop the packet. */
2022           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2023                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2024                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2025                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2026                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2027                     ? IP4_ERROR_SRC_LOOKUP_MISS
2028                     : error0);
2029
2030           next0 = lm->local_next_by_ip_protocol[proto0];
2031
2032           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2033
2034           p0->error = error0? error_node->errors[error0] : 0;
2035
2036           if (PREDICT_FALSE (next0 != next_index))
2037             {
2038               n_left_to_next += 1;
2039               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2040
2041               next_index = next0;
2042               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2043               to_next[0] = pi0;
2044               to_next += 1;
2045               n_left_to_next -= 1;
2046             }
2047         }
2048   
2049       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2050     }
2051
2052   return frame->n_vectors;
2053 }
2054
2055 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2056   .function = ip4_local,
2057   .name = "ip4-local",
2058   .vector_size = sizeof (u32),
2059
2060   .format_trace = format_ip4_forward_next_trace,
2061
2062   .n_next_nodes = IP_LOCAL_N_NEXT,
2063   .next_nodes = {
2064     [IP_LOCAL_NEXT_DROP] = "error-drop",
2065     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2066     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2067     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2068   },
2069 };
2070
2071 void ip4_register_protocol (u32 protocol, u32 node_index)
2072 {
2073   vlib_main_t * vm = vlib_get_main();
2074   ip4_main_t * im = &ip4_main;
2075   ip_lookup_main_t * lm = &im->lookup_main;
2076
2077   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2078   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2079 }
2080
2081 static clib_error_t *
2082 show_ip_local_command_fn (vlib_main_t * vm,
2083                           unformat_input_t * input,
2084                          vlib_cli_command_t * cmd)
2085 {
2086   ip4_main_t * im = &ip4_main;
2087   ip_lookup_main_t * lm = &im->lookup_main;
2088   int i;
2089
2090   vlib_cli_output (vm, "Protocols handled by ip4_local");
2091   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2092     {
2093       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2094         vlib_cli_output (vm, "%d", i);
2095     }
2096   return 0;
2097 }
2098
2099
2100
2101 VLIB_CLI_COMMAND (show_ip_local, static) = {
2102   .path = "show ip local",
2103   .function = show_ip_local_command_fn,
2104   .short_help = "Show ip local protocol table",
2105 };
2106
2107 static uword
2108 ip4_arp (vlib_main_t * vm,
2109          vlib_node_runtime_t * node,
2110          vlib_frame_t * frame)
2111 {
2112   vnet_main_t * vnm = vnet_get_main();
2113   ip4_main_t * im = &ip4_main;
2114   ip_lookup_main_t * lm = &im->lookup_main;
2115   u32 * from, * to_next_drop;
2116   uword n_left_from, n_left_to_next_drop, next_index;
2117   static f64 time_last_seed_change = -1e100;
2118   static u32 hash_seeds[3];
2119   static uword hash_bitmap[256 / BITS (uword)]; 
2120   f64 time_now;
2121
2122   if (node->flags & VLIB_NODE_FLAG_TRACE)
2123     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2124
2125   time_now = vlib_time_now (vm);
2126   if (time_now - time_last_seed_change > 1e-3)
2127     {
2128       uword i;
2129       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2130                                              sizeof (hash_seeds));
2131       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2132         hash_seeds[i] = r[i];
2133
2134       /* Mark all hash keys as been no-seen before. */
2135       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2136         hash_bitmap[i] = 0;
2137
2138       time_last_seed_change = time_now;
2139     }
2140
2141   from = vlib_frame_vector_args (frame);
2142   n_left_from = frame->n_vectors;
2143   next_index = node->cached_next_index;
2144   if (next_index == IP4_ARP_NEXT_DROP)
2145     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2146
2147   while (n_left_from > 0)
2148     {
2149       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2150                            to_next_drop, n_left_to_next_drop);
2151
2152       while (n_left_from > 0 && n_left_to_next_drop > 0)
2153         {
2154           vlib_buffer_t * p0;
2155           ip4_header_t * ip0;
2156           ethernet_header_t * eh0;
2157           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2158           uword bm0;
2159           ip_adjacency_t * adj0;
2160
2161           pi0 = from[0];
2162
2163           p0 = vlib_get_buffer (vm, pi0);
2164
2165           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2166           adj0 = ip_get_adjacency (lm, adj_index0);
2167           ip0 = vlib_buffer_get_current (p0);
2168
2169           /* If packet destination is not local, send ARP to next hop */
2170           if (adj0->arp.next_hop.ip4.as_u32)
2171             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2172
2173           /* 
2174            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2175            * rewrite to this packet, we need to skip it here.
2176            * Note, to distinguish from src IP addr *.8.6.*, we
2177            * check for a bcast eth dest instead of IPv4 version.
2178            */
2179           eh0 = (ethernet_header_t*)ip0;
2180           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2181             {
2182               u32 vlan_num = 0;
2183               u16 * etype = &eh0->type;
2184               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2185                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2186                 {
2187                   vlan_num += 1;
2188                   etype += 2; //vlan tag also 16 bits, same as etype
2189                 }
2190               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2191                 {
2192                   vlib_buffer_advance (
2193                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2194                   ip0 = vlib_buffer_get_current (p0);
2195                 }
2196             }
2197
2198           a0 = hash_seeds[0];
2199           b0 = hash_seeds[1];
2200           c0 = hash_seeds[2];
2201
2202           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2203           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2204
2205           a0 ^= ip0->dst_address.data_u32;
2206           b0 ^= sw_if_index0;
2207
2208           hash_v3_finalize32 (a0, b0, c0);
2209
2210           c0 &= BITS (hash_bitmap) - 1;
2211           c0 = c0 / BITS (uword);
2212           m0 = (uword) 1 << (c0 % BITS (uword));
2213
2214           bm0 = hash_bitmap[c0];
2215           drop0 = (bm0 & m0) != 0;
2216
2217           /* Mark it as seen. */
2218           hash_bitmap[c0] = bm0 | m0;
2219
2220           from += 1;
2221           n_left_from -= 1;
2222           to_next_drop[0] = pi0;
2223           to_next_drop += 1;
2224           n_left_to_next_drop -= 1;
2225
2226           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2227
2228           if (drop0)
2229             continue;
2230
2231           /* 
2232            * Can happen if the control-plane is programming tables
2233            * with traffic flowing; at least that's today's lame excuse.
2234            */
2235           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2236             {
2237               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2238             }
2239           else
2240           /* Send ARP request. */
2241           {
2242             u32 bi0 = 0;
2243             vlib_buffer_t * b0;
2244             ethernet_arp_header_t * h0;
2245             vnet_hw_interface_t * hw_if0;
2246
2247             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2248
2249             /* Add rewrite/encap string for ARP packet. */
2250             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2251
2252             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2253
2254             /* Src ethernet address in ARP header. */
2255             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2256                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2257
2258             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2259
2260             /* Copy in destination address we are requesting. */
2261             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2262
2263             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2264             b0 = vlib_get_buffer (vm, bi0);
2265             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2266
2267             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2268
2269             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2270           }
2271         }
2272
2273       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2274     }
2275
2276   return frame->n_vectors;
2277 }
2278
2279 static char * ip4_arp_error_strings[] = {
2280   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2281   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2282   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2283   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2284   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2285 };
2286
2287 VLIB_REGISTER_NODE (ip4_arp_node) = {
2288   .function = ip4_arp,
2289   .name = "ip4-arp",
2290   .vector_size = sizeof (u32),
2291
2292   .format_trace = format_ip4_forward_next_trace,
2293
2294   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2295   .error_strings = ip4_arp_error_strings,
2296
2297   .n_next_nodes = IP4_ARP_N_NEXT,
2298   .next_nodes = {
2299     [IP4_ARP_NEXT_DROP] = "error-drop",
2300   },
2301 };
2302
2303 #define foreach_notrace_ip4_arp_error           \
2304 _(DROP)                                         \
2305 _(REQUEST_SENT)                                 \
2306 _(REPLICATE_DROP)                               \
2307 _(REPLICATE_FAIL)
2308
2309 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2310 {
2311   vlib_node_runtime_t *rt = 
2312     vlib_node_get_runtime (vm, ip4_arp_node.index);
2313
2314   /* don't trace ARP request packets */
2315 #define _(a)                                    \
2316     vnet_pcap_drop_trace_filter_add_del         \
2317         (rt->errors[IP4_ARP_ERROR_##a],         \
2318          1 /* is_add */);
2319     foreach_notrace_ip4_arp_error;
2320 #undef _
2321   return 0;
2322 }
2323
2324 VLIB_INIT_FUNCTION(arp_notrace_init);
2325
2326
2327 /* Send an ARP request to see if given destination is reachable on given interface. */
2328 clib_error_t *
2329 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2330 {
2331   vnet_main_t * vnm = vnet_get_main();
2332   ip4_main_t * im = &ip4_main;
2333   ethernet_arp_header_t * h;
2334   ip4_address_t * src;
2335   ip_interface_address_t * ia;
2336   ip_adjacency_t * adj;
2337   vnet_hw_interface_t * hi;
2338   vnet_sw_interface_t * si;
2339   vlib_buffer_t * b;
2340   u32 bi = 0;
2341
2342   si = vnet_get_sw_interface (vnm, sw_if_index);
2343
2344   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2345     {
2346       return clib_error_return (0, "%U: interface %U down",
2347                                 format_ip4_address, dst, 
2348                                 format_vnet_sw_if_index_name, vnm, 
2349                                 sw_if_index);
2350     }
2351
2352   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2353   if (! src)
2354     {
2355       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2356       return clib_error_return 
2357         (0, "no matching interface address for destination %U (interface %U)",
2358          format_ip4_address, dst,
2359          format_vnet_sw_if_index_name, vnm, sw_if_index);
2360     }
2361
2362   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2363
2364   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2365
2366   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2367
2368   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2369
2370   h->ip4_over_ethernet[0].ip4 = src[0];
2371   h->ip4_over_ethernet[1].ip4 = dst[0];
2372
2373   b = vlib_get_buffer (vm, bi);
2374   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2375
2376   /* Add encapsulation string for software interface (e.g. ethernet header). */
2377   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2378   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2379
2380   {
2381     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2382     u32 * to_next = vlib_frame_vector_args (f);
2383     to_next[0] = bi;
2384     f->n_vectors = 1;
2385     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2386   }
2387
2388   return /* no error */ 0;
2389 }
2390
2391 typedef enum {
2392   IP4_REWRITE_NEXT_DROP,
2393   IP4_REWRITE_NEXT_ARP,
2394 } ip4_rewrite_next_t;
2395
2396 always_inline uword
2397 ip4_rewrite_inline (vlib_main_t * vm,
2398                     vlib_node_runtime_t * node,
2399                     vlib_frame_t * frame,
2400                     int rewrite_for_locally_received_packets)
2401 {
2402   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2403   u32 * from = vlib_frame_vector_args (frame);
2404   u32 n_left_from, n_left_to_next, * to_next, next_index;
2405   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2406   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2407
2408   n_left_from = frame->n_vectors;
2409   next_index = node->cached_next_index;
2410   u32 cpu_index = os_get_cpu_number();
2411   
2412   while (n_left_from > 0)
2413     {
2414       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2415
2416       while (n_left_from >= 4 && n_left_to_next >= 2)
2417         {
2418           ip_adjacency_t * adj0, * adj1;
2419           vlib_buffer_t * p0, * p1;
2420           ip4_header_t * ip0, * ip1;
2421           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2422           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2423           u32 next0_override, next1_override;
2424       
2425           if (rewrite_for_locally_received_packets)
2426               next0_override = next1_override = 0;
2427
2428           /* Prefetch next iteration. */
2429           {
2430             vlib_buffer_t * p2, * p3;
2431
2432             p2 = vlib_get_buffer (vm, from[2]);
2433             p3 = vlib_get_buffer (vm, from[3]);
2434
2435             vlib_prefetch_buffer_header (p2, STORE);
2436             vlib_prefetch_buffer_header (p3, STORE);
2437
2438             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2439             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2440           }
2441
2442           pi0 = to_next[0] = from[0];
2443           pi1 = to_next[1] = from[1];
2444
2445           from += 2;
2446           n_left_from -= 2;
2447           to_next += 2;
2448           n_left_to_next -= 2;
2449       
2450           p0 = vlib_get_buffer (vm, pi0);
2451           p1 = vlib_get_buffer (vm, pi1);
2452
2453           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2454           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2455
2456           /* We should never rewrite a pkt using the MISS adjacency */
2457           ASSERT(adj_index0 && adj_index1);
2458
2459           ip0 = vlib_buffer_get_current (p0);
2460           ip1 = vlib_buffer_get_current (p1);
2461
2462           error0 = error1 = IP4_ERROR_NONE;
2463
2464           /* Decrement TTL & update checksum.
2465              Works either endian, so no need for byte swap. */
2466           if (! rewrite_for_locally_received_packets)
2467             {
2468               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2469
2470               /* Input node should have reject packets with ttl 0. */
2471               ASSERT (ip0->ttl > 0);
2472               ASSERT (ip1->ttl > 0);
2473
2474               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2475               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2476
2477               checksum0 += checksum0 >= 0xffff;
2478               checksum1 += checksum1 >= 0xffff;
2479
2480               ip0->checksum = checksum0;
2481               ip1->checksum = checksum1;
2482
2483               ttl0 -= 1;
2484               ttl1 -= 1;
2485
2486               ip0->ttl = ttl0;
2487               ip1->ttl = ttl1;
2488
2489               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2490               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2491
2492               /* Verify checksum. */
2493               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2494               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2495             }
2496
2497           /* Rewrite packet header and updates lengths. */
2498           adj0 = ip_get_adjacency (lm, adj_index0);
2499           adj1 = ip_get_adjacency (lm, adj_index1);
2500       
2501           if (rewrite_for_locally_received_packets)
2502             {
2503               /*
2504                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2505                * we end up here with a local adjacency in hand
2506                * The local adj rewrite data is 0xfefe on purpose.
2507                * Bad engineer, no donut for you.
2508                */
2509               if (PREDICT_FALSE(adj0->lookup_next_index 
2510                                 == IP_LOOKUP_NEXT_LOCAL))
2511                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2512               if (PREDICT_FALSE(adj0->lookup_next_index
2513                                 == IP_LOOKUP_NEXT_ARP))
2514                 next0_override = IP4_REWRITE_NEXT_ARP;
2515               if (PREDICT_FALSE(adj1->lookup_next_index 
2516                                 == IP_LOOKUP_NEXT_LOCAL))
2517                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2518               if (PREDICT_FALSE(adj1->lookup_next_index
2519                                 == IP_LOOKUP_NEXT_ARP))
2520                 next1_override = IP4_REWRITE_NEXT_ARP;
2521             }
2522
2523           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2524           rw_len0 = adj0[0].rewrite_header.data_bytes;
2525           rw_len1 = adj1[0].rewrite_header.data_bytes;
2526           next0 = (error0 == IP4_ERROR_NONE) 
2527             ? adj0[0].rewrite_header.next_index : 0;
2528
2529           if (rewrite_for_locally_received_packets)
2530               next0 = next0 && next0_override ? next0_override : next0;
2531
2532           next1 = (error1 == IP4_ERROR_NONE)
2533             ? adj1[0].rewrite_header.next_index : 0;
2534
2535           if (rewrite_for_locally_received_packets)
2536               next1 = next1 && next1_override ? next1_override : next1;
2537
2538           /* 
2539            * We've already accounted for an ethernet_header_t elsewhere
2540            */
2541           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2542               vlib_increment_combined_counter 
2543                   (&lm->adjacency_counters,
2544                    cpu_index, adj_index0, 
2545                    /* packet increment */ 0,
2546                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2547
2548           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2549               vlib_increment_combined_counter 
2550                   (&lm->adjacency_counters,
2551                    cpu_index, adj_index1, 
2552                    /* packet increment */ 0,
2553                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2554
2555           /* Check MTU of outgoing interface. */
2556           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2557                     ? IP4_ERROR_MTU_EXCEEDED
2558                     : error0);
2559           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2560                     ? IP4_ERROR_MTU_EXCEEDED
2561                     : error1);
2562
2563           p0->current_data -= rw_len0;
2564           p1->current_data -= rw_len1;
2565
2566           p0->current_length += rw_len0;
2567           p1->current_length += rw_len1;
2568
2569           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2570           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2571       
2572           p0->error = error_node->errors[error0];
2573           p1->error = error_node->errors[error1];
2574
2575           /* Guess we are only writing on simple Ethernet header. */
2576           vnet_rewrite_two_headers (adj0[0], adj1[0],
2577                                     ip0, ip1,
2578                                     sizeof (ethernet_header_t));
2579       
2580           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2581                                            to_next, n_left_to_next,
2582                                            pi0, pi1, next0, next1);
2583         }
2584
2585       while (n_left_from > 0 && n_left_to_next > 0)
2586         {
2587           ip_adjacency_t * adj0;
2588           vlib_buffer_t * p0;
2589           ip4_header_t * ip0;
2590           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2591           u32 next0_override;
2592       
2593           if (rewrite_for_locally_received_packets)
2594               next0_override = 0;
2595
2596           pi0 = to_next[0] = from[0];
2597
2598           p0 = vlib_get_buffer (vm, pi0);
2599
2600           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2601
2602           /* We should never rewrite a pkt using the MISS adjacency */
2603           ASSERT(adj_index0);
2604
2605           adj0 = ip_get_adjacency (lm, adj_index0);
2606       
2607           ip0 = vlib_buffer_get_current (p0);
2608
2609           error0 = IP4_ERROR_NONE;
2610           next0 = 0;            /* drop on error */
2611
2612           /* Decrement TTL & update checksum. */
2613           if (! rewrite_for_locally_received_packets)
2614             {
2615               i32 ttl0 = ip0->ttl;
2616
2617               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2618
2619               checksum0 += checksum0 >= 0xffff;
2620
2621               ip0->checksum = checksum0;
2622
2623               ASSERT (ip0->ttl > 0);
2624
2625               ttl0 -= 1;
2626
2627               ip0->ttl = ttl0;
2628
2629               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2630
2631               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2632             }
2633
2634           if (rewrite_for_locally_received_packets)
2635             {
2636               /*
2637                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2638                * we end up here with a local adjacency in hand
2639                * The local adj rewrite data is 0xfefe on purpose.
2640                * Bad engineer, no donut for you.
2641                */
2642               if (PREDICT_FALSE(adj0->lookup_next_index 
2643                                 == IP_LOOKUP_NEXT_LOCAL))
2644                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2645               /* 
2646                * We have to override the next_index in ARP adjacencies,
2647                * because they're set up for ip4-arp, not this node...
2648                */
2649               if (PREDICT_FALSE(adj0->lookup_next_index
2650                                 == IP_LOOKUP_NEXT_ARP))
2651                 next0_override = IP4_REWRITE_NEXT_ARP;
2652             }
2653
2654           /* Guess we are only writing on simple Ethernet header. */
2655           vnet_rewrite_one_header (adj0[0], ip0, 
2656                                    sizeof (ethernet_header_t));
2657           
2658           /* Update packet buffer attributes/set output interface. */
2659           rw_len0 = adj0[0].rewrite_header.data_bytes;
2660           
2661           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2662               vlib_increment_combined_counter 
2663                   (&lm->adjacency_counters,
2664                    cpu_index, adj_index0, 
2665                    /* packet increment */ 0,
2666                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2667           
2668           /* Check MTU of outgoing interface. */
2669           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2670                     > adj0[0].rewrite_header.max_l3_packet_bytes
2671                     ? IP4_ERROR_MTU_EXCEEDED
2672                     : error0);
2673           
2674           p0->error = error_node->errors[error0];
2675           p0->current_data -= rw_len0;
2676           p0->current_length += rw_len0;
2677           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2678             adj0[0].rewrite_header.sw_if_index;
2679           
2680           next0 = (error0 == IP4_ERROR_NONE)
2681             ? adj0[0].rewrite_header.next_index : 0;
2682
2683           if (rewrite_for_locally_received_packets)
2684               next0 = next0 && next0_override ? next0_override : next0;
2685
2686           from += 1;
2687           n_left_from -= 1;
2688           to_next += 1;
2689           n_left_to_next -= 1;
2690       
2691           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2692                                            to_next, n_left_to_next,
2693                                            pi0, next0);
2694         }
2695   
2696       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2697     }
2698
2699   /* Need to do trace after rewrites to pick up new packet data. */
2700   if (node->flags & VLIB_NODE_FLAG_TRACE)
2701     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2702
2703   return frame->n_vectors;
2704 }
2705
2706 static uword
2707 ip4_rewrite_transit (vlib_main_t * vm,
2708                      vlib_node_runtime_t * node,
2709                      vlib_frame_t * frame)
2710 {
2711   return ip4_rewrite_inline (vm, node, frame,
2712                              /* rewrite_for_locally_received_packets */ 0);
2713 }
2714
2715 static uword
2716 ip4_rewrite_local (vlib_main_t * vm,
2717                    vlib_node_runtime_t * node,
2718                    vlib_frame_t * frame)
2719 {
2720   return ip4_rewrite_inline (vm, node, frame,
2721                              /* rewrite_for_locally_received_packets */ 1);
2722 }
2723
2724 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2725   .function = ip4_rewrite_transit,
2726   .name = "ip4-rewrite-transit",
2727   .vector_size = sizeof (u32),
2728
2729   .format_trace = format_ip4_forward_next_trace,
2730
2731   .n_next_nodes = 2,
2732   .next_nodes = {
2733     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2734     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2735   },
2736 };
2737
2738 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2739   .function = ip4_rewrite_local,
2740   .name = "ip4-rewrite-local",
2741   .vector_size = sizeof (u32),
2742
2743   .sibling_of = "ip4-rewrite-transit",
2744
2745   .format_trace = format_ip4_forward_next_trace,
2746
2747   .n_next_nodes = 2,
2748   .next_nodes = {
2749     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2750     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2751   },
2752 };
2753
2754 static clib_error_t *
2755 add_del_interface_table (vlib_main_t * vm,
2756                          unformat_input_t * input,
2757                          vlib_cli_command_t * cmd)
2758 {
2759   vnet_main_t * vnm = vnet_get_main();
2760   clib_error_t * error = 0;
2761   u32 sw_if_index, table_id;
2762
2763   sw_if_index = ~0;
2764
2765   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2766     {
2767       error = clib_error_return (0, "unknown interface `%U'",
2768                                  format_unformat_error, input);
2769       goto done;
2770     }
2771
2772   if (unformat (input, "%d", &table_id))
2773     ;
2774   else
2775     {
2776       error = clib_error_return (0, "expected table id `%U'",
2777                                  format_unformat_error, input);
2778       goto done;
2779     }
2780
2781   {
2782     ip4_main_t * im = &ip4_main;
2783     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2784
2785     if (fib) 
2786       {
2787         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2788         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2789     }
2790   }
2791
2792  done:
2793   return error;
2794 }
2795
2796 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2797   .path = "set interface ip table",
2798   .function = add_del_interface_table,
2799   .short_help = "Add/delete FIB table id for interface",
2800 };
2801
2802
2803 static uword
2804 ip4_lookup_multicast (vlib_main_t * vm,
2805                       vlib_node_runtime_t * node,
2806                       vlib_frame_t * frame)
2807 {
2808   ip4_main_t * im = &ip4_main;
2809   ip_lookup_main_t * lm = &im->lookup_main;
2810   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2811   u32 n_left_from, n_left_to_next, * from, * to_next;
2812   ip_lookup_next_t next;
2813   u32 cpu_index = os_get_cpu_number();
2814
2815   from = vlib_frame_vector_args (frame);
2816   n_left_from = frame->n_vectors;
2817   next = node->cached_next_index;
2818
2819   while (n_left_from > 0)
2820     {
2821       vlib_get_next_frame (vm, node, next,
2822                            to_next, n_left_to_next);
2823
2824       while (n_left_from >= 4 && n_left_to_next >= 2)
2825         {
2826           vlib_buffer_t * p0, * p1;
2827           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2828           ip_lookup_next_t next0, next1;
2829           ip4_header_t * ip0, * ip1;
2830           ip_adjacency_t * adj0, * adj1;
2831           u32 fib_index0, fib_index1;
2832           u32 flow_hash_config0, flow_hash_config1;
2833
2834           /* Prefetch next iteration. */
2835           {
2836             vlib_buffer_t * p2, * p3;
2837
2838             p2 = vlib_get_buffer (vm, from[2]);
2839             p3 = vlib_get_buffer (vm, from[3]);
2840
2841             vlib_prefetch_buffer_header (p2, LOAD);
2842             vlib_prefetch_buffer_header (p3, LOAD);
2843
2844             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2845             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2846           }
2847
2848           pi0 = to_next[0] = from[0];
2849           pi1 = to_next[1] = from[1];
2850
2851           p0 = vlib_get_buffer (vm, pi0);
2852           p1 = vlib_get_buffer (vm, pi1);
2853
2854           ip0 = vlib_buffer_get_current (p0);
2855           ip1 = vlib_buffer_get_current (p1);
2856
2857           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2858           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2859           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2860             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2861           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2862             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2863
2864           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2865                                               &ip0->dst_address, p0);
2866           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2867                                               &ip1->dst_address, p1);
2868
2869           adj0 = ip_get_adjacency (lm, adj_index0);
2870           adj1 = ip_get_adjacency (lm, adj_index1);
2871
2872           next0 = adj0->lookup_next_index;
2873           next1 = adj1->lookup_next_index;
2874
2875           flow_hash_config0 = 
2876               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2877
2878           flow_hash_config1 = 
2879               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2880
2881           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2882               (ip0, flow_hash_config0);
2883                                                                   
2884           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2885               (ip1, flow_hash_config1);
2886
2887           ASSERT (adj0->n_adj > 0);
2888           ASSERT (adj1->n_adj > 0);
2889           ASSERT (is_pow2 (adj0->n_adj));
2890           ASSERT (is_pow2 (adj1->n_adj));
2891           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2892           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2893
2894           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2895           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2896
2897           if (1) /* $$$$$$ HACK FIXME */
2898           vlib_increment_combined_counter 
2899               (cm, cpu_index, adj_index0, 1,
2900                vlib_buffer_length_in_chain (vm, p0));
2901           if (1) /* $$$$$$ HACK FIXME */
2902           vlib_increment_combined_counter 
2903               (cm, cpu_index, adj_index1, 1,
2904                vlib_buffer_length_in_chain (vm, p1));
2905
2906           from += 2;
2907           to_next += 2;
2908           n_left_to_next -= 2;
2909           n_left_from -= 2;
2910
2911           wrong_next = (next0 != next) + 2*(next1 != next);
2912           if (PREDICT_FALSE (wrong_next != 0))
2913             {
2914               switch (wrong_next)
2915                 {
2916                 case 1:
2917                   /* A B A */
2918                   to_next[-2] = pi1;
2919                   to_next -= 1;
2920                   n_left_to_next += 1;
2921                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2922                   break;
2923
2924                 case 2:
2925                   /* A A B */
2926                   to_next -= 1;
2927                   n_left_to_next += 1;
2928                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2929                   break;
2930
2931                 case 3:
2932                   /* A B C */
2933                   to_next -= 2;
2934                   n_left_to_next += 2;
2935                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2936                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2937                   if (next0 == next1)
2938                     {
2939                       /* A B B */
2940                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2941                       next = next1;
2942                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2943                     }
2944                 }
2945             }
2946         }
2947     
2948       while (n_left_from > 0 && n_left_to_next > 0)
2949         {
2950           vlib_buffer_t * p0;
2951           ip4_header_t * ip0;
2952           u32 pi0, adj_index0;
2953           ip_lookup_next_t next0;
2954           ip_adjacency_t * adj0;
2955           u32 fib_index0;
2956           u32 flow_hash_config0;
2957
2958           pi0 = from[0];
2959           to_next[0] = pi0;
2960
2961           p0 = vlib_get_buffer (vm, pi0);
2962
2963           ip0 = vlib_buffer_get_current (p0);
2964
2965           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2966                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2967           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2968               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2969           
2970           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2971                                               &ip0->dst_address, p0);
2972
2973           adj0 = ip_get_adjacency (lm, adj_index0);
2974
2975           next0 = adj0->lookup_next_index;
2976
2977           flow_hash_config0 = 
2978               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2979
2980           vnet_buffer (p0)->ip.flow_hash = 
2981             ip4_compute_flow_hash (ip0, flow_hash_config0);
2982
2983           ASSERT (adj0->n_adj > 0);
2984           ASSERT (is_pow2 (adj0->n_adj));
2985           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2986
2987           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2988
2989           if (1) /* $$$$$$ HACK FIXME */
2990               vlib_increment_combined_counter 
2991                   (cm, cpu_index, adj_index0, 1,
2992                    vlib_buffer_length_in_chain (vm, p0));
2993
2994           from += 1;
2995           to_next += 1;
2996           n_left_to_next -= 1;
2997           n_left_from -= 1;
2998
2999           if (PREDICT_FALSE (next0 != next))
3000             {
3001               n_left_to_next += 1;
3002               vlib_put_next_frame (vm, node, next, n_left_to_next);
3003               next = next0;
3004               vlib_get_next_frame (vm, node, next,
3005                                    to_next, n_left_to_next);
3006               to_next[0] = pi0;
3007               to_next += 1;
3008               n_left_to_next -= 1;
3009             }
3010         }
3011
3012       vlib_put_next_frame (vm, node, next, n_left_to_next);
3013     }
3014
3015   return frame->n_vectors;
3016 }
3017
3018 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3019   .function = ip4_lookup_multicast,
3020   .name = "ip4-lookup-multicast",
3021   .vector_size = sizeof (u32),
3022
3023   .n_next_nodes = IP_LOOKUP_N_NEXT,
3024   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3025 };
3026
3027 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3028   .function = ip4_drop,
3029   .name = "ip4-multicast",
3030   .vector_size = sizeof (u32),
3031
3032   .format_trace = format_ip4_forward_next_trace,
3033
3034   .n_next_nodes = 1,
3035   .next_nodes = {
3036     [0] = "error-drop",
3037   },
3038 };
3039
3040 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3041 {
3042   ip4_main_t * im = &ip4_main;
3043   ip4_fib_mtrie_t * mtrie0;
3044   ip4_fib_mtrie_leaf_t leaf0;
3045   u32 adj_index0;
3046     
3047   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3048
3049   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3050   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3051   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3052   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3053   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3054   
3055   /* Handle default route. */
3056   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3057   
3058   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3059   
3060   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3061                                                   a, 
3062                                                   /* no_default_route */ 0);
3063 }
3064  
3065 static clib_error_t *
3066 test_lookup_command_fn (vlib_main_t * vm,
3067                         unformat_input_t * input,
3068                         vlib_cli_command_t * cmd)
3069 {
3070   u32 table_id = 0;
3071   f64 count = 1;
3072   u32 n;
3073   int i;
3074   ip4_address_t ip4_base_address;
3075   u64 errors = 0;
3076
3077   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3078       if (unformat (input, "table %d", &table_id))
3079         ;
3080       else if (unformat (input, "count %f", &count))
3081         ;
3082
3083       else if (unformat (input, "%U",
3084                          unformat_ip4_address, &ip4_base_address))
3085         ;
3086       else
3087         return clib_error_return (0, "unknown input `%U'",
3088                                   format_unformat_error, input);
3089   }
3090
3091   n = count;
3092
3093   for (i = 0; i < n; i++)
3094     {
3095       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3096         errors++;
3097
3098       ip4_base_address.as_u32 = 
3099         clib_host_to_net_u32 (1 + 
3100                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3101     }
3102
3103   if (errors) 
3104     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3105   else
3106     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3107
3108   return 0;
3109 }
3110
3111 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3112     .path = "test lookup",
3113     .short_help = "test lookup",
3114     .function = test_lookup_command_fn,
3115 };
3116
3117 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3118 {
3119   ip4_main_t * im4 = &ip4_main;
3120   ip4_fib_t * fib;
3121   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3122
3123   if (p == 0)
3124     return VNET_API_ERROR_NO_SUCH_FIB;
3125
3126   fib = vec_elt_at_index (im4->fibs, p[0]);
3127
3128   fib->flow_hash_config = flow_hash_config;
3129   return 0;
3130 }
3131  
3132 static clib_error_t *
3133 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3134                              unformat_input_t * input,
3135                              vlib_cli_command_t * cmd)
3136 {
3137   int matched = 0;
3138   u32 table_id = 0;
3139   u32 flow_hash_config = 0;
3140   int rv;
3141
3142   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3143     if (unformat (input, "table %d", &table_id))
3144       matched = 1;
3145 #define _(a,v) \
3146     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3147     foreach_flow_hash_bit
3148 #undef _
3149     else break;
3150   }
3151   
3152   if (matched == 0)
3153     return clib_error_return (0, "unknown input `%U'",
3154                               format_unformat_error, input);
3155   
3156   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3157   switch (rv)
3158     {
3159     case 0:
3160       break;
3161       
3162     case VNET_API_ERROR_NO_SUCH_FIB:
3163       return clib_error_return (0, "no such FIB table %d", table_id);
3164       
3165     default:
3166       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3167       break;
3168     }
3169   
3170   return 0;
3171 }
3172  
3173 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3174   .path = "set ip flow-hash",
3175   .short_help = 
3176   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3177   .function = set_ip_flow_hash_command_fn,
3178 };
3179  
3180 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3181                                  u32 table_index)
3182 {
3183   vnet_main_t * vnm = vnet_get_main();
3184   vnet_interface_main_t * im = &vnm->interface_main;
3185   ip4_main_t * ipm = &ip4_main;
3186   ip_lookup_main_t * lm = &ipm->lookup_main;
3187   vnet_classify_main_t * cm = &vnet_classify_main;
3188
3189   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3190     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3191
3192   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3193     return VNET_API_ERROR_NO_SUCH_ENTRY;
3194
3195   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3196   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3197
3198   return 0;
3199 }
3200
3201 static clib_error_t *
3202 set_ip_classify_command_fn (vlib_main_t * vm,
3203                             unformat_input_t * input,
3204                             vlib_cli_command_t * cmd)
3205 {
3206   u32 table_index = ~0;
3207   int table_index_set = 0;
3208   u32 sw_if_index = ~0;
3209   int rv;
3210   
3211   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3212     if (unformat (input, "table-index %d", &table_index))
3213       table_index_set = 1;
3214     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3215                        vnet_get_main(), &sw_if_index))
3216       ;
3217     else
3218       break;
3219   }
3220       
3221   if (table_index_set == 0)
3222     return clib_error_return (0, "classify table-index must be specified");
3223
3224   if (sw_if_index == ~0)
3225     return clib_error_return (0, "interface / subif must be specified");
3226
3227   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3228
3229   switch (rv)
3230     {
3231     case 0:
3232       break;
3233
3234     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3235       return clib_error_return (0, "No such interface");
3236
3237     case VNET_API_ERROR_NO_SUCH_ENTRY:
3238       return clib_error_return (0, "No such classifier table");
3239     }
3240   return 0;
3241 }
3242
3243 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3244     .path = "set ip classify",
3245     .short_help = 
3246     "set ip classify intfc <int> table-index <index>",
3247     .function = set_ip_classify_command_fn,
3248 };
3249