VPP-69: Create fib or bridge domain with arbitrary ID
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       if (table_index_or_id == ~0) {
107         table_index_or_id = 0;
108         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
109           table_index_or_id++;
110         }
111         return create_fib_with_table_id (im, table_index_or_id);
112       }
113
114       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
115       if (! p)
116         return create_fib_with_table_id (im, table_index_or_id);
117       fib_index = p[0];
118     }
119   return vec_elt_at_index (im->fibs, fib_index);
120 }
121
122 static void
123 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
124                                        ip4_fib_t * fib,
125                                        u32 address_length)
126 {
127   hash_t * h;
128   uword max_index;
129
130   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
131   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
132
133   fib->adj_index_by_dst_address[address_length] =
134     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
135
136   hash_set_flags (fib->adj_index_by_dst_address[address_length],
137                   HASH_FLAG_NO_AUTO_SHRINK);
138
139   h = hash_header (fib->adj_index_by_dst_address[address_length]);
140   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
141
142   /* Initialize new/old hash value vectors. */
143   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
144   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
145 }
146
147 static void
148 ip4_fib_set_adj_index (ip4_main_t * im,
149                        ip4_fib_t * fib,
150                        u32 flags,
151                        u32 dst_address_u32,
152                        u32 dst_address_length,
153                        u32 adj_index)
154 {
155   ip_lookup_main_t * lm = &im->lookup_main;
156   uword * hash;
157
158   if (vec_bytes(fib->old_hash_values))
159     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
160   if (vec_bytes(fib->new_hash_values))
161     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
162   fib->new_hash_values[0] = adj_index;
163
164   /* Make sure adj index is valid. */
165   if (CLIB_DEBUG > 0)
166     (void) ip_get_adjacency (lm, adj_index);
167
168   hash = fib->adj_index_by_dst_address[dst_address_length];
169
170   hash = _hash_set3 (hash, dst_address_u32,
171                      fib->new_hash_values,
172                      fib->old_hash_values);
173
174   fib->adj_index_by_dst_address[dst_address_length] = hash;
175
176   if (vec_len (im->add_del_route_callbacks) > 0)
177     {
178       ip4_add_del_route_callback_t * cb;
179       ip4_address_t d;
180       uword * p;
181
182       d.data_u32 = dst_address_u32;
183       vec_foreach (cb, im->add_del_route_callbacks)
184         if ((flags & cb->required_flags) == cb->required_flags)
185           cb->function (im, cb->function_opaque,
186                         fib, flags,
187                         &d, dst_address_length,
188                         fib->old_hash_values,
189                         fib->new_hash_values);
190
191       p = hash_get (hash, dst_address_u32);
192       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
193     }
194 }
195
196 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
197 {
198   ip_lookup_main_t * lm = &im->lookup_main;
199   ip4_fib_t * fib;
200   u32 dst_address, dst_address_length, adj_index, old_adj_index;
201   uword * hash, is_del;
202   ip4_add_del_route_callback_t * cb;
203
204   /* Either create new adjacency or use given one depending on arguments. */
205   if (a->n_add_adj > 0)
206     {
207       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
208       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
209     }
210   else
211     adj_index = a->adj_index;
212
213   dst_address = a->dst_address.data_u32;
214   dst_address_length = a->dst_address_length;
215   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
216
217   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
218   dst_address &= im->fib_masks[dst_address_length];
219
220   if (! fib->adj_index_by_dst_address[dst_address_length])
221     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
222
223   hash = fib->adj_index_by_dst_address[dst_address_length];
224
225   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
226
227   if (is_del)
228     {
229       fib->old_hash_values[0] = ~0;
230       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
231       fib->adj_index_by_dst_address[dst_address_length] = hash;
232
233       if (vec_len (im->add_del_route_callbacks) > 0
234           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
235         {
236           fib->new_hash_values[0] = ~0;
237           vec_foreach (cb, im->add_del_route_callbacks)
238             if ((a->flags & cb->required_flags) == cb->required_flags)
239               cb->function (im, cb->function_opaque,
240                             fib, a->flags,
241                             &a->dst_address, dst_address_length,
242                             fib->old_hash_values,
243                             fib->new_hash_values);
244         }
245     }
246   else
247     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
248                            adj_index);
249
250   old_adj_index = fib->old_hash_values[0];
251
252   /* Avoid spurious reference count increments */
253   if (old_adj_index == adj_index
254       && adj_index != ~0
255       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
256     {
257       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
258       if (adj->share_count > 0)
259         adj->share_count --;
260     }
261
262   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
263                                is_del ? old_adj_index : adj_index,
264                                is_del);
265
266   /* Delete old adjacency index if present and changed. */
267   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
268       && old_adj_index != ~0
269       && old_adj_index != adj_index)
270     ip_del_adjacency (lm, old_adj_index);
271 }
272
273 void
274 ip4_add_del_route_next_hop (ip4_main_t * im,
275                             u32 flags,
276                             ip4_address_t * dst_address,
277                             u32 dst_address_length,
278                             ip4_address_t * next_hop,
279                             u32 next_hop_sw_if_index,
280                             u32 next_hop_weight, u32 adj_index, 
281                             u32 explicit_fib_index)
282 {
283   vnet_main_t * vnm = vnet_get_main();
284   ip_lookup_main_t * lm = &im->lookup_main;
285   u32 fib_index;
286   ip4_fib_t * fib;
287   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
288   u32 dst_adj_index, nh_adj_index;
289   uword * dst_hash, * dst_result;
290   uword * nh_hash, * nh_result;
291   ip_adjacency_t * dst_adj;
292   ip_multipath_adjacency_t * old_mp, * new_mp;
293   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
294   int is_interface_next_hop;
295   clib_error_t * error = 0;
296
297   if (explicit_fib_index == (u32)~0)
298       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
299   else
300       fib_index = explicit_fib_index;
301
302   fib = vec_elt_at_index (im->fibs, fib_index);
303   
304   /* Lookup next hop to be added or deleted. */
305   is_interface_next_hop = next_hop->data_u32 == 0;
306   if (adj_index == (u32)~0)
307     {
308       if (is_interface_next_hop)
309         {
310           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
311           if (nh_result)
312             nh_adj_index = *nh_result;
313           else
314             {
315               ip_adjacency_t * adj;
316               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
317                                       &nh_adj_index);
318               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
319               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
320               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
321             }
322         }
323       else
324         {
325           nh_hash = fib->adj_index_by_dst_address[32];
326           nh_result = hash_get (nh_hash, next_hop->data_u32);
327           
328           /* Next hop must be known. */
329           if (! nh_result)
330             {
331               ip_adjacency_t * adj;
332
333               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
334                                                         next_hop, 0);
335               adj = ip_get_adjacency (lm, nh_adj_index);
336               /* if ARP interface adjacencty is present, we need to
337                  install ARP adjaceny for specific next hop */
338               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
339                   adj->arp.next_hop.ip4.as_u32 == 0)
340                 {
341                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
342                 }
343               else
344                 {
345                   /* Next hop is not known, so create indirect adj */
346                   ip_adjacency_t add_adj;
347                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
348                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
349                   add_adj.explicit_fib_index = explicit_fib_index;
350                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
351                 }
352             }
353           else
354             nh_adj_index = *nh_result;
355         }
356     }
357   else
358     {
359       nh_adj_index = adj_index;
360     }
361   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
362   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
363
364   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
365   dst_result = hash_get (dst_hash, dst_address_u32);
366   if (dst_result)
367     {
368       dst_adj_index = dst_result[0];
369       dst_adj = ip_get_adjacency (lm, dst_adj_index);
370     }
371   else
372     {
373       /* For deletes destination must be known. */
374       if (is_del)
375         {
376           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
377           error = clib_error_return (0, "unknown destination %U/%d",
378                                      format_ip4_address, dst_address,
379                                      dst_address_length);
380           goto done;
381         }
382
383       dst_adj_index = ~0;
384       dst_adj = 0;
385     }
386
387   /* Ignore adds of X/32 with next hop of X. */
388   if (! is_del
389       && dst_address_length == 32
390       && dst_address->data_u32 == next_hop->data_u32 
391       && adj_index != (u32)~0)
392     {
393       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
394       error = clib_error_return (0, "prefix matches next hop %U/%d",
395                                  format_ip4_address, dst_address,
396                                  dst_address_length);
397       goto done;
398     }
399
400   /* Destination is not known and default weight is set so add route
401      to existing non-multipath adjacency */
402   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
403     {
404       /* create new adjacency */
405       ip4_add_del_route_args_t a;
406       a.table_index_or_table_id = fib_index;
407       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
408                  | IP4_ROUTE_FLAG_FIB_INDEX
409                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
410                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
411                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
412       a.dst_address = dst_address[0];
413       a.dst_address_length = dst_address_length;
414       a.adj_index = nh_adj_index;
415       a.add_adj = 0;
416       a.n_add_adj = 0;
417
418       ip4_add_del_route (im, &a);
419
420       goto done;
421     }
422
423   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
424
425   if (! ip_multipath_adjacency_add_del_next_hop
426       (lm, is_del,
427        old_mp_adj_index,
428        nh_adj_index,
429        next_hop_weight,
430        &new_mp_adj_index))
431     {
432       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
433       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
434                                  format_ip4_address, next_hop);
435       goto done;
436     }
437   
438   old_mp = new_mp = 0;
439   if (old_mp_adj_index != ~0)
440     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
441   if (new_mp_adj_index != ~0)
442     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
443
444   if (old_mp != new_mp)
445     {
446       ip4_add_del_route_args_t a;
447       a.table_index_or_table_id = fib_index;
448       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
449                  | IP4_ROUTE_FLAG_FIB_INDEX
450                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
451                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
452       a.dst_address = dst_address[0];
453       a.dst_address_length = dst_address_length;
454       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
455       a.add_adj = 0;
456       a.n_add_adj = 0;
457
458       ip4_add_del_route (im, &a);
459     }
460
461  done:
462   if (error)
463     clib_error_report (error);
464 }
465
466 void *
467 ip4_get_route (ip4_main_t * im,
468                u32 table_index_or_table_id,
469                u32 flags,
470                u8 * address,
471                u32 address_length)
472 {
473   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
474   u32 dst_address = * (u32 *) address;
475   uword * hash, * p;
476
477   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
478   dst_address &= im->fib_masks[address_length];
479
480   hash = fib->adj_index_by_dst_address[address_length];
481   p = hash_get (hash, dst_address);
482   return (void *) p;
483 }
484
485 void
486 ip4_foreach_matching_route (ip4_main_t * im,
487                             u32 table_index_or_table_id,
488                             u32 flags,
489                             ip4_address_t * address,
490                             u32 address_length,
491                             ip4_address_t ** results,
492                             u8 ** result_lengths)
493 {
494   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
495   u32 dst_address = address->data_u32;
496   u32 this_length = address_length;
497   
498   if (*results)
499     _vec_len (*results) = 0;
500   if (*result_lengths)
501     _vec_len (*result_lengths) = 0;
502
503   while (this_length <= 32 && vec_len (results) == 0)
504     {
505       uword k, v;
506       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
507         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
508           {
509             ip4_address_t a;
510             a.data_u32 = k;
511             vec_add1 (*results, a);
512             vec_add1 (*result_lengths, this_length);
513           }
514       }));
515
516       this_length++;
517     }
518 }
519
520 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
521                                   u32 table_index_or_table_id,
522                                   u32 flags)
523 {
524   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
525   ip_lookup_main_t * lm = &im->lookup_main;
526   u32 i, l;
527   ip4_address_t a;
528   ip4_add_del_route_callback_t * cb;
529   static ip4_address_t * to_delete;
530
531   if (lm->n_adjacency_remaps == 0)
532     return;
533
534   for (l = 0; l <= 32; l++)
535     {
536       hash_pair_t * p;
537       uword * hash = fib->adj_index_by_dst_address[l];
538
539       if (hash_elts (hash) == 0)
540         continue;
541
542       if (to_delete)
543         _vec_len (to_delete) = 0;
544
545       hash_foreach_pair (p, hash, ({
546         u32 adj_index = p->value[0];
547         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
548
549         if (m)
550           {
551             /* Record destination address from hash key. */
552             a.data_u32 = p->key;
553
554             /* New adjacency points to nothing: so delete prefix. */
555             if (m == ~0)
556               vec_add1 (to_delete, a);
557             else
558               {
559                 /* Remap to new adjacency. */
560                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
561
562                 /* Set new adjacency value. */
563                 fib->new_hash_values[0] = p->value[0] = m - 1;
564
565                 vec_foreach (cb, im->add_del_route_callbacks)
566                   if ((flags & cb->required_flags) == cb->required_flags)
567                     cb->function (im, cb->function_opaque,
568                                   fib, flags | IP4_ROUTE_FLAG_ADD,
569                                   &a, l,
570                                   fib->old_hash_values,
571                                   fib->new_hash_values);
572               }
573           }
574       }));
575
576       fib->new_hash_values[0] = ~0;
577       for (i = 0; i < vec_len (to_delete); i++)
578         {
579           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
580           vec_foreach (cb, im->add_del_route_callbacks)
581             if ((flags & cb->required_flags) == cb->required_flags)
582               cb->function (im, cb->function_opaque,
583                             fib, flags | IP4_ROUTE_FLAG_DEL,
584                             &a, l,
585                             fib->old_hash_values,
586                             fib->new_hash_values);
587         }
588     }
589
590   /* Also remap adjacencies in mtrie. */
591   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
592
593   /* Reset mapping table. */
594   vec_zero (lm->adjacency_remap_table);
595
596   /* All remaps have been performed. */
597   lm->n_adjacency_remaps = 0;
598 }
599
600 void ip4_delete_matching_routes (ip4_main_t * im,
601                                  u32 table_index_or_table_id,
602                                  u32 flags,
603                                  ip4_address_t * address,
604                                  u32 address_length)
605 {
606   static ip4_address_t * matching_addresses;
607   static u8 * matching_address_lengths;
608   u32 l, i;
609   ip4_add_del_route_args_t a;
610
611   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
612   a.table_index_or_table_id = table_index_or_table_id;
613   a.adj_index = ~0;
614   a.add_adj = 0;
615   a.n_add_adj = 0;
616
617   for (l = address_length + 1; l <= 32; l++)
618     {
619       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
620                                   address,
621                                   l,
622                                   &matching_addresses,
623                                   &matching_address_lengths);
624       for (i = 0; i < vec_len (matching_addresses); i++)
625         {
626           a.dst_address = matching_addresses[i];
627           a.dst_address_length = matching_address_lengths[i];
628           ip4_add_del_route (im, &a);
629         }
630     }
631
632   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
633 }
634
635 always_inline uword
636 ip4_lookup_inline (vlib_main_t * vm,
637                    vlib_node_runtime_t * node,
638                    vlib_frame_t * frame,
639                    int lookup_for_responses_to_locally_received_packets,
640                    int is_indirect)
641 {
642   ip4_main_t * im = &ip4_main;
643   ip_lookup_main_t * lm = &im->lookup_main;
644   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
645   u32 n_left_from, n_left_to_next, * from, * to_next;
646   ip_lookup_next_t next;
647   u32 cpu_index = os_get_cpu_number();
648
649   from = vlib_frame_vector_args (frame);
650   n_left_from = frame->n_vectors;
651   next = node->cached_next_index;
652
653   while (n_left_from > 0)
654     {
655       vlib_get_next_frame (vm, node, next,
656                            to_next, n_left_to_next);
657
658       while (n_left_from >= 4 && n_left_to_next >= 2)
659         {
660           vlib_buffer_t * p0, * p1;
661           ip4_header_t * ip0, * ip1;
662           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
663           ip_lookup_next_t next0, next1;
664           ip_adjacency_t * adj0, * adj1;
665           ip4_fib_mtrie_t * mtrie0, * mtrie1;
666           ip4_fib_mtrie_leaf_t leaf0, leaf1;
667           ip4_address_t * dst_addr0, *dst_addr1;
668           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
669           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
670           u32 flow_hash_config0, flow_hash_config1;
671           u32 hash_c0, hash_c1;
672           u32 wrong_next;
673
674           /* Prefetch next iteration. */
675           {
676             vlib_buffer_t * p2, * p3;
677
678             p2 = vlib_get_buffer (vm, from[2]);
679             p3 = vlib_get_buffer (vm, from[3]);
680
681             vlib_prefetch_buffer_header (p2, LOAD);
682             vlib_prefetch_buffer_header (p3, LOAD);
683
684             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
685             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
686           }
687
688           pi0 = to_next[0] = from[0];
689           pi1 = to_next[1] = from[1];
690
691           p0 = vlib_get_buffer (vm, pi0);
692           p1 = vlib_get_buffer (vm, pi1);
693
694           ip0 = vlib_buffer_get_current (p0);
695           ip1 = vlib_buffer_get_current (p1);
696
697           if (is_indirect)
698             {
699               ip_adjacency_t * iadj0, * iadj1;
700               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
701               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
702               dst_addr0 = &iadj0->indirect.next_hop.ip4;
703               dst_addr1 = &iadj1->indirect.next_hop.ip4;
704             }
705           else
706             {
707               dst_addr0 = &ip0->dst_address;
708               dst_addr1 = &ip1->dst_address;
709             }
710
711           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
712           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
713           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
714             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
715           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
716             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
717
718
719           if (! lookup_for_responses_to_locally_received_packets)
720             {
721               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
722               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
723
724               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
725
726               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
727               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
728             }
729
730           tcp0 = (void *) (ip0 + 1);
731           tcp1 = (void *) (ip1 + 1);
732
733           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
734                          || ip0->protocol == IP_PROTOCOL_UDP);
735           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
736                          || ip1->protocol == IP_PROTOCOL_UDP);
737
738           if (! lookup_for_responses_to_locally_received_packets)
739             {
740               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
741               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
742             }
743
744           if (! lookup_for_responses_to_locally_received_packets)
745             {
746               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
747               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
748             }
749
750           if (! lookup_for_responses_to_locally_received_packets)
751             {
752               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
753               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
754             }
755
756           if (lookup_for_responses_to_locally_received_packets)
757             {
758               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
759               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
760             }
761           else
762             {
763               /* Handle default route. */
764               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
765               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
766
767               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
768               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
769             }
770
771           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
772                                                            dst_addr0,
773                                                            /* no_default_route */ 0));
774           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
775                                                            dst_addr1,
776                                                            /* no_default_route */ 0));
777           adj0 = ip_get_adjacency (lm, adj_index0);
778           adj1 = ip_get_adjacency (lm, adj_index1);
779
780           next0 = adj0->lookup_next_index;
781           next1 = adj1->lookup_next_index;
782
783           /* Use flow hash to compute multipath adjacency. */
784           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
785           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
786           if (PREDICT_FALSE (adj0->n_adj > 1))
787             {
788               flow_hash_config0 = 
789                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
790               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
791                 ip4_compute_flow_hash (ip0, flow_hash_config0);
792             }
793           if (PREDICT_FALSE(adj1->n_adj > 1))
794             {
795               flow_hash_config1 = 
796                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
797               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
798                 ip4_compute_flow_hash (ip1, flow_hash_config1);
799             }
800
801           ASSERT (adj0->n_adj > 0);
802           ASSERT (adj1->n_adj > 0);
803           ASSERT (is_pow2 (adj0->n_adj));
804           ASSERT (is_pow2 (adj1->n_adj));
805           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
806           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
807
808           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
809           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
810
811           vlib_increment_combined_counter 
812               (cm, cpu_index, adj_index0, 1,
813                vlib_buffer_length_in_chain (vm, p0) 
814                + sizeof(ethernet_header_t));
815           vlib_increment_combined_counter 
816               (cm, cpu_index, adj_index1, 1,
817                vlib_buffer_length_in_chain (vm, p1)
818                + sizeof(ethernet_header_t));
819
820           from += 2;
821           to_next += 2;
822           n_left_to_next -= 2;
823           n_left_from -= 2;
824
825           wrong_next = (next0 != next) + 2*(next1 != next);
826           if (PREDICT_FALSE (wrong_next != 0))
827             {
828               switch (wrong_next)
829                 {
830                 case 1:
831                   /* A B A */
832                   to_next[-2] = pi1;
833                   to_next -= 1;
834                   n_left_to_next += 1;
835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
836                   break;
837
838                 case 2:
839                   /* A A B */
840                   to_next -= 1;
841                   n_left_to_next += 1;
842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
843                   break;
844
845                 case 3:
846                   /* A B C */
847                   to_next -= 2;
848                   n_left_to_next += 2;
849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
851                   if (next0 == next1)
852                     {
853                       /* A B B */
854                       vlib_put_next_frame (vm, node, next, n_left_to_next);
855                       next = next1;
856                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
857                     }
858                 }
859             }
860         }
861     
862       while (n_left_from > 0 && n_left_to_next > 0)
863         {
864           vlib_buffer_t * p0;
865           ip4_header_t * ip0;
866           __attribute__((unused)) tcp_header_t * tcp0;
867           ip_lookup_next_t next0;
868           ip_adjacency_t * adj0;
869           ip4_fib_mtrie_t * mtrie0;
870           ip4_fib_mtrie_leaf_t leaf0;
871           ip4_address_t * dst_addr0;
872           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
873           u32 flow_hash_config0, hash_c0;
874
875           pi0 = from[0];
876           to_next[0] = pi0;
877
878           p0 = vlib_get_buffer (vm, pi0);
879
880           ip0 = vlib_buffer_get_current (p0);
881
882           if (is_indirect)
883             {
884               ip_adjacency_t * iadj0;
885               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
886               dst_addr0 = &iadj0->indirect.next_hop.ip4;
887             }
888           else
889             {
890               dst_addr0 = &ip0->dst_address;
891             }
892
893           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
894           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
895             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
896
897           if (! lookup_for_responses_to_locally_received_packets)
898             {
899               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
900
901               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
902
903               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
904             }
905
906           tcp0 = (void *) (ip0 + 1);
907
908           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
909                          || ip0->protocol == IP_PROTOCOL_UDP);
910
911           if (! lookup_for_responses_to_locally_received_packets)
912             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
913
914           if (! lookup_for_responses_to_locally_received_packets)
915             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
916
917           if (! lookup_for_responses_to_locally_received_packets)
918             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
919
920           if (lookup_for_responses_to_locally_received_packets)
921             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
922           else
923             {
924               /* Handle default route. */
925               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
926               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
927             }
928
929           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
930                                                            dst_addr0,
931                                                            /* no_default_route */ 0));
932
933           adj0 = ip_get_adjacency (lm, adj_index0);
934
935           next0 = adj0->lookup_next_index;
936
937           /* Use flow hash to compute multipath adjacency. */
938           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
939           if (PREDICT_FALSE(adj0->n_adj > 1))
940             {
941               flow_hash_config0 = 
942                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
943
944               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
945                 ip4_compute_flow_hash (ip0, flow_hash_config0);
946             }
947
948           ASSERT (adj0->n_adj > 0);
949           ASSERT (is_pow2 (adj0->n_adj));
950           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
951
952           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
953
954           vlib_increment_combined_counter 
955               (cm, cpu_index, adj_index0, 1,
956                vlib_buffer_length_in_chain (vm, p0)
957                + sizeof(ethernet_header_t));
958
959           from += 1;
960           to_next += 1;
961           n_left_to_next -= 1;
962           n_left_from -= 1;
963
964           if (PREDICT_FALSE (next0 != next))
965             {
966               n_left_to_next += 1;
967               vlib_put_next_frame (vm, node, next, n_left_to_next);
968               next = next0;
969               vlib_get_next_frame (vm, node, next,
970                                    to_next, n_left_to_next);
971               to_next[0] = pi0;
972               to_next += 1;
973               n_left_to_next -= 1;
974             }
975         }
976
977       vlib_put_next_frame (vm, node, next, n_left_to_next);
978     }
979
980   return frame->n_vectors;
981 }
982
983 static uword
984 ip4_lookup (vlib_main_t * vm,
985             vlib_node_runtime_t * node,
986             vlib_frame_t * frame)
987 {
988   return ip4_lookup_inline (vm, node, frame,
989                             /* lookup_for_responses_to_locally_received_packets */ 0,
990                             /* is_indirect */ 0);
991
992 }
993
994 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
995                                         ip_adjacency_t * adj,
996                                         u32 sw_if_index,
997                                         u32 if_address_index)
998 {
999   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1000   ip_lookup_next_t n;
1001   vnet_l3_packet_type_t packet_type;
1002   u32 node_index;
1003
1004   if (hw->hw_class_index == ethernet_hw_interface_class.index
1005       || hw->hw_class_index == srp_hw_interface_class.index)
1006     {
1007       /* 
1008        * We have a bit of a problem in this case. ip4-arp uses
1009        * the rewrite_header.next_index to hand pkts to the
1010        * indicated inteface output node. We can end up in
1011        * ip4_rewrite_local, too, which also pays attention to 
1012        * rewrite_header.next index. Net result: a hack in
1013        * ip4_rewrite_local...
1014        */
1015       n = IP_LOOKUP_NEXT_ARP;
1016       node_index = ip4_arp_node.index;
1017       adj->if_address_index = if_address_index;
1018       adj->arp.next_hop.ip4.as_u32 = 0;
1019       ip46_address_reset(&adj->arp.next_hop);
1020       packet_type = VNET_L3_PACKET_TYPE_ARP;
1021     }
1022   else
1023     {
1024       n = IP_LOOKUP_NEXT_REWRITE;
1025       node_index = ip4_rewrite_node.index;
1026       packet_type = VNET_L3_PACKET_TYPE_IP4;
1027     }
1028
1029   adj->lookup_next_index = n;
1030   vnet_rewrite_for_sw_interface
1031     (vnm,
1032      packet_type,
1033      sw_if_index,
1034      node_index,
1035      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1036      &adj->rewrite_header,
1037      sizeof (adj->rewrite_data));
1038 }
1039
1040 static void
1041 ip4_add_interface_routes (u32 sw_if_index,
1042                           ip4_main_t * im, u32 fib_index,
1043                           ip_interface_address_t * a)
1044 {
1045   vnet_main_t * vnm = vnet_get_main();
1046   ip_lookup_main_t * lm = &im->lookup_main;
1047   ip_adjacency_t * adj;
1048   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1049   ip4_add_del_route_args_t x;
1050   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1051   u32 classify_table_index;
1052
1053   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1054   x.table_index_or_table_id = fib_index;
1055   x.flags = (IP4_ROUTE_FLAG_ADD
1056              | IP4_ROUTE_FLAG_FIB_INDEX
1057              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1058   x.dst_address = address[0];
1059   x.dst_address_length = a->address_length;
1060   x.n_add_adj = 0;
1061   x.add_adj = 0;
1062
1063   a->neighbor_probe_adj_index = ~0;
1064   if (a->address_length < 32)
1065     {
1066       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1067                               &x.adj_index);
1068       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1069       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1070       ip4_add_del_route (im, &x);
1071       a->neighbor_probe_adj_index = x.adj_index;
1072     }
1073   
1074   /* Add e.g. 1.1.1.1/32 as local to this host. */
1075   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1076                           &x.adj_index);
1077   
1078   classify_table_index = ~0;
1079   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1080     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1081   if (classify_table_index != (u32) ~0)
1082     {
1083       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1084       adj->classify.table_index = classify_table_index;
1085     }
1086   else
1087     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1088   
1089   adj->if_address_index = a - lm->if_address_pool;
1090   adj->rewrite_header.sw_if_index = sw_if_index;
1091   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1092   /* 
1093    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1094    * fail an RPF-ish check, but still go thru the rewrite code...
1095    */
1096   adj->rewrite_header.data_bytes = 0;
1097
1098   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1099   x.dst_address_length = 32;
1100   ip4_add_del_route (im, &x);
1101 }
1102
1103 static void
1104 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1105 {
1106   ip4_add_del_route_args_t x;
1107
1108   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1109   x.table_index_or_table_id = fib_index;
1110   x.flags = (IP4_ROUTE_FLAG_DEL
1111              | IP4_ROUTE_FLAG_FIB_INDEX
1112              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1113   x.dst_address = address[0];
1114   x.dst_address_length = address_length;
1115   x.adj_index = ~0;
1116   x.n_add_adj = 0;
1117   x.add_adj = 0;
1118
1119   if (address_length < 32)
1120     ip4_add_del_route (im, &x);
1121
1122   x.dst_address_length = 32;
1123   ip4_add_del_route (im, &x);
1124
1125   ip4_delete_matching_routes (im,
1126                               fib_index,
1127                               IP4_ROUTE_FLAG_FIB_INDEX,
1128                               address,
1129                               address_length);
1130 }
1131
1132 typedef struct {
1133     u32 sw_if_index;
1134     ip4_address_t address;
1135     u32 length;
1136 } ip4_interface_address_t;
1137
1138 static clib_error_t *
1139 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1140                                         u32 sw_if_index,
1141                                         ip4_address_t * new_address,
1142                                         u32 new_length,
1143                                         u32 redistribute,
1144                                         u32 insert_routes,
1145                                         u32 is_del);
1146
1147 static clib_error_t *
1148 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1149                                         u32 sw_if_index,
1150                                         ip4_address_t * address,
1151                                         u32 address_length,
1152                                         u32 redistribute,
1153                                         u32 insert_routes,
1154                                         u32 is_del)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip4_main_t * im = &ip4_main;
1158   ip_lookup_main_t * lm = &im->lookup_main;
1159   clib_error_t * error = 0;
1160   u32 if_address_index, elts_before;
1161   ip4_address_fib_t ip4_af, * addr_fib = 0;
1162
1163   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1164   ip4_addr_fib_init (&ip4_af, address,
1165                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1166   vec_add1 (addr_fib, ip4_af);
1167
1168   /* When adding an address check that it does not conflict with an existing address. */
1169   if (! is_del)
1170     {
1171       ip_interface_address_t * ia;
1172       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1173                                     0 /* honor unnumbered */,
1174       ({
1175         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1176
1177         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1178             || ip4_destination_matches_route (im, x, address, address_length))
1179           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1180                                     format_ip4_address_and_length, address, address_length,
1181                                     format_ip4_address_and_length, x, ia->address_length,
1182                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1183       }));
1184     }
1185
1186   elts_before = pool_elts (lm->if_address_pool);
1187
1188   error = ip_interface_address_add_del
1189     (lm,
1190      sw_if_index,
1191      addr_fib,
1192      address_length,
1193      is_del,
1194      &if_address_index);
1195   if (error)
1196     goto done;
1197   
1198   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1199     {
1200       if (is_del)
1201         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1202                                   address_length);
1203       
1204       else
1205           ip4_add_interface_routes (sw_if_index,
1206                                     im, ip4_af.fib_index,
1207                                     pool_elt_at_index 
1208                                     (lm->if_address_pool, if_address_index));
1209     }
1210
1211   /* If pool did not grow/shrink: add duplicate address. */
1212   if (elts_before != pool_elts (lm->if_address_pool))
1213     {
1214       ip4_add_del_interface_address_callback_t * cb;
1215       vec_foreach (cb, im->add_del_interface_address_callbacks)
1216         cb->function (im, cb->function_opaque, sw_if_index,
1217                       address, address_length,
1218                       if_address_index,
1219                       is_del);
1220     }
1221
1222  done:
1223   vec_free (addr_fib);
1224   return error;
1225 }
1226
1227 clib_error_t *
1228 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1229                                ip4_address_t * address, u32 address_length,
1230                                u32 is_del)
1231 {
1232   return ip4_add_del_interface_address_internal
1233     (vm, sw_if_index, address, address_length,
1234      /* redistribute */ 1,
1235      /* insert_routes */ 1,
1236      is_del);
1237 }
1238
1239 static clib_error_t *
1240 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1241                                 u32 sw_if_index,
1242                                 u32 flags)
1243 {
1244   ip4_main_t * im = &ip4_main;
1245   ip_interface_address_t * ia;
1246   ip4_address_t * a;
1247   u32 is_admin_up, fib_index;
1248   
1249   /* Fill in lookup tables with default table (0). */
1250   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1251   
1252   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1253   
1254   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1255   
1256   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1257
1258   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1259                                 0 /* honor unnumbered */,
1260   ({
1261     a = ip_interface_address_get_address (&im->lookup_main, ia);
1262     if (is_admin_up)
1263       ip4_add_interface_routes (sw_if_index,
1264                                 im, fib_index,
1265                                 ia);
1266     else
1267       ip4_del_interface_routes (im, fib_index,
1268                                 a, ia->address_length);
1269   }));
1270
1271   return 0;
1272 }
1273  
1274 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1275
1276 static clib_error_t *
1277 ip4_sw_interface_add_del (vnet_main_t * vnm,
1278                           u32 sw_if_index,
1279                           u32 is_add)
1280 {
1281   vlib_main_t * vm = vnm->vlib_main;
1282   ip4_main_t * im = &ip4_main;
1283   ip_lookup_main_t * lm = &im->lookup_main;
1284   u32 ci, cast;
1285
1286   for (cast = 0; cast < VNET_N_CAST; cast++)
1287     {
1288       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1289       vnet_config_main_t * vcm = &cm->config_main;
1290
1291       if (! vcm->node_index_by_feature_index)
1292         {
1293           if (cast == VNET_UNICAST)
1294             {
1295               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1296               static char * feature_nodes[] = {
1297                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1298                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1299                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1300                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1301                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1302                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1303               };
1304
1305               vnet_config_init (vm, vcm,
1306                                 start_nodes, ARRAY_LEN (start_nodes),
1307                                 feature_nodes, ARRAY_LEN (feature_nodes));
1308             }
1309           else
1310             {
1311               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1312               static char * feature_nodes[] = {
1313                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1314                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1315               };
1316
1317               vnet_config_init (vm, vcm,
1318                                 start_nodes, ARRAY_LEN (start_nodes),
1319                                 feature_nodes, ARRAY_LEN (feature_nodes));
1320             }
1321         }
1322
1323       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1324       ci = cm->config_index_by_sw_if_index[sw_if_index];
1325
1326       if (is_add)
1327         ci = vnet_config_add_feature (vm, vcm,
1328                                       ci,
1329                                       IP4_RX_FEATURE_LOOKUP,
1330                                       /* config data */ 0,
1331                                       /* # bytes of config data */ 0);
1332       else
1333         ci = vnet_config_del_feature (vm, vcm,
1334                                       ci,
1335                                       IP4_RX_FEATURE_LOOKUP,
1336                                       /* config data */ 0,
1337                                       /* # bytes of config data */ 0);
1338
1339       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1340     }
1341
1342   return /* no error */ 0;
1343 }
1344
1345 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1346
1347
1348 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1349   .function = ip4_lookup,
1350   .name = "ip4-lookup",
1351   .vector_size = sizeof (u32),
1352
1353   .n_next_nodes = IP_LOOKUP_N_NEXT,
1354   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1355 };
1356
1357 static uword
1358 ip4_indirect (vlib_main_t * vm,
1359                vlib_node_runtime_t * node,
1360                vlib_frame_t * frame)
1361 {
1362   return ip4_lookup_inline (vm, node, frame,
1363                             /* lookup_for_responses_to_locally_received_packets */ 0,
1364                             /* is_indirect */ 1);
1365 }
1366
1367 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1368   .function = ip4_indirect,
1369   .name = "ip4-indirect",
1370   .vector_size = sizeof (u32),
1371
1372   .n_next_nodes = IP_LOOKUP_N_NEXT,
1373   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1374 };
1375
1376
1377 /* Global IP4 main. */
1378 ip4_main_t ip4_main;
1379
1380 clib_error_t *
1381 ip4_lookup_init (vlib_main_t * vm)
1382 {
1383   ip4_main_t * im = &ip4_main;
1384   uword i;
1385
1386   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1387     {
1388       u32 m;
1389
1390       if (i < 32)
1391         m = pow2_mask (i) << (32 - i);
1392       else 
1393         m = ~0;
1394       im->fib_masks[i] = clib_host_to_net_u32 (m);
1395     }
1396
1397   /* Create FIB with index 0 and table id of 0. */
1398   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1399
1400   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1401
1402   {
1403     pg_node_t * pn;
1404     pn = pg_get_node (ip4_lookup_node.index);
1405     pn->unformat_edit = unformat_pg_ip4_header;
1406   }
1407
1408   {
1409     ethernet_arp_header_t h;
1410
1411     memset (&h, 0, sizeof (h));
1412
1413     /* Set target ethernet address to all zeros. */
1414     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1415
1416 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1417 #define _8(f,v) h.f = v;
1418     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1419     _16 (l3_type, ETHERNET_TYPE_IP4);
1420     _8 (n_l2_address_bytes, 6);
1421     _8 (n_l3_address_bytes, 4);
1422     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1423 #undef _16
1424 #undef _8
1425
1426     vlib_packet_template_init (vm,
1427                                &im->ip4_arp_request_packet_template,
1428                                /* data */ &h,
1429                                sizeof (h),
1430                                /* alloc chunk size */ 8,
1431                                "ip4 arp");
1432   }
1433
1434   return 0;
1435 }
1436
1437 VLIB_INIT_FUNCTION (ip4_lookup_init);
1438
1439 typedef struct {
1440   /* Adjacency taken. */
1441   u32 adj_index;
1442   u32 flow_hash;
1443   u32 fib_index;
1444
1445   /* Packet data, possibly *after* rewrite. */
1446   u8 packet_data[64 - 1*sizeof(u32)];
1447 } ip4_forward_next_trace_t;
1448
1449 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1450 {
1451   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1452   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1453   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1454   vnet_main_t * vnm = vnet_get_main();
1455   ip4_main_t * im = &ip4_main;
1456   ip_adjacency_t * adj;
1457   uword indent = format_get_indent (s);
1458
1459   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1460   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1461               t->fib_index, t->adj_index, format_ip_adjacency,
1462               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1463   switch (adj->lookup_next_index)
1464     {
1465     case IP_LOOKUP_NEXT_REWRITE:
1466       s = format (s, "\n%U%U",
1467                   format_white_space, indent,
1468                   format_ip_adjacency_packet_data,
1469                   vnm, &im->lookup_main, t->adj_index,
1470                   t->packet_data, sizeof (t->packet_data));
1471       break;
1472
1473     default:
1474       break;
1475     }
1476
1477   return s;
1478 }
1479
1480 /* Common trace function for all ip4-forward next nodes. */
1481 void
1482 ip4_forward_next_trace (vlib_main_t * vm,
1483                         vlib_node_runtime_t * node,
1484                         vlib_frame_t * frame,
1485                         vlib_rx_or_tx_t which_adj_index)
1486 {
1487   u32 * from, n_left;
1488   ip4_main_t * im = &ip4_main;
1489
1490   n_left = frame->n_vectors;
1491   from = vlib_frame_vector_args (frame);
1492   
1493   while (n_left >= 4)
1494     {
1495       u32 bi0, bi1;
1496       vlib_buffer_t * b0, * b1;
1497       ip4_forward_next_trace_t * t0, * t1;
1498
1499       /* Prefetch next iteration. */
1500       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1501       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1502
1503       bi0 = from[0];
1504       bi1 = from[1];
1505
1506       b0 = vlib_get_buffer (vm, bi0);
1507       b1 = vlib_get_buffer (vm, bi1);
1508
1509       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1510         {
1511           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1512           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1513           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1514           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1515                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1516           clib_memcpy (t0->packet_data,
1517                   vlib_buffer_get_current (b0),
1518                   sizeof (t0->packet_data));
1519         }
1520       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1521         {
1522           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1523           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1524           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1525           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1526                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1527           clib_memcpy (t1->packet_data,
1528                   vlib_buffer_get_current (b1),
1529                   sizeof (t1->packet_data));
1530         }
1531       from += 2;
1532       n_left -= 2;
1533     }
1534
1535   while (n_left >= 1)
1536     {
1537       u32 bi0;
1538       vlib_buffer_t * b0;
1539       ip4_forward_next_trace_t * t0;
1540
1541       bi0 = from[0];
1542
1543       b0 = vlib_get_buffer (vm, bi0);
1544
1545       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1546         {
1547           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1548           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1549           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1550           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1551                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1552           clib_memcpy (t0->packet_data,
1553                   vlib_buffer_get_current (b0),
1554                   sizeof (t0->packet_data));
1555         }
1556       from += 1;
1557       n_left -= 1;
1558     }
1559 }
1560
1561 static uword
1562 ip4_drop_or_punt (vlib_main_t * vm,
1563                   vlib_node_runtime_t * node,
1564                   vlib_frame_t * frame,
1565                   ip4_error_t error_code)
1566 {
1567   u32 * buffers = vlib_frame_vector_args (frame);
1568   uword n_packets = frame->n_vectors;
1569
1570   vlib_error_drop_buffers (vm, node,
1571                            buffers,
1572                            /* stride */ 1,
1573                            n_packets,
1574                            /* next */ 0,
1575                            ip4_input_node.index,
1576                            error_code);
1577
1578   if (node->flags & VLIB_NODE_FLAG_TRACE)
1579     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1580
1581   return n_packets;
1582 }
1583
1584 static uword
1585 ip4_drop (vlib_main_t * vm,
1586           vlib_node_runtime_t * node,
1587           vlib_frame_t * frame)
1588 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1589
1590 static uword
1591 ip4_punt (vlib_main_t * vm,
1592           vlib_node_runtime_t * node,
1593           vlib_frame_t * frame)
1594 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1595
1596 static uword
1597 ip4_miss (vlib_main_t * vm,
1598           vlib_node_runtime_t * node,
1599           vlib_frame_t * frame)
1600 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1601
1602 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1603   .function = ip4_drop,
1604   .name = "ip4-drop",
1605   .vector_size = sizeof (u32),
1606
1607   .format_trace = format_ip4_forward_next_trace,
1608
1609   .n_next_nodes = 1,
1610   .next_nodes = {
1611     [0] = "error-drop",
1612   },
1613 };
1614
1615 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1616   .function = ip4_punt,
1617   .name = "ip4-punt",
1618   .vector_size = sizeof (u32),
1619
1620   .format_trace = format_ip4_forward_next_trace,
1621
1622   .n_next_nodes = 1,
1623   .next_nodes = {
1624     [0] = "error-punt",
1625   },
1626 };
1627
1628 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1629   .function = ip4_miss,
1630   .name = "ip4-miss",
1631   .vector_size = sizeof (u32),
1632
1633   .format_trace = format_ip4_forward_next_trace,
1634
1635   .n_next_nodes = 1,
1636   .next_nodes = {
1637     [0] = "error-drop",
1638   },
1639 };
1640
1641 /* Compute TCP/UDP/ICMP4 checksum in software. */
1642 u16
1643 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1644                               ip4_header_t * ip0)
1645 {
1646   ip_csum_t sum0;
1647   u32 ip_header_length, payload_length_host_byte_order;
1648   u32 n_this_buffer, n_bytes_left;
1649   u16 sum16;
1650   void * data_this_buffer;
1651   
1652   /* Initialize checksum with ip header. */
1653   ip_header_length = ip4_header_bytes (ip0);
1654   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1655   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1656
1657   if (BITS (uword) == 32)
1658     {
1659       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1660       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1661     }
1662   else
1663     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1664
1665   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1666   data_this_buffer = (void *) ip0 + ip_header_length;
1667   if (n_this_buffer + ip_header_length > p0->current_length)
1668     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1669   while (1)
1670     {
1671       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1672       n_bytes_left -= n_this_buffer;
1673       if (n_bytes_left == 0)
1674         break;
1675
1676       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1677       p0 = vlib_get_buffer (vm, p0->next_buffer);
1678       data_this_buffer = vlib_buffer_get_current (p0);
1679       n_this_buffer = p0->current_length;
1680     }
1681
1682   sum16 = ~ ip_csum_fold (sum0);
1683
1684   return sum16;
1685 }
1686
1687 static u32
1688 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1689 {
1690   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1691   udp_header_t * udp0;
1692   u16 sum16;
1693
1694   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1695           || ip0->protocol == IP_PROTOCOL_UDP);
1696
1697   udp0 = (void *) (ip0 + 1);
1698   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1699     {
1700       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1701                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1702       return p0->flags;
1703     }
1704
1705   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1706
1707   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1708                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1709
1710   return p0->flags;
1711 }
1712
1713 static uword
1714 ip4_local (vlib_main_t * vm,
1715            vlib_node_runtime_t * node,
1716            vlib_frame_t * frame)
1717 {
1718   ip4_main_t * im = &ip4_main;
1719   ip_lookup_main_t * lm = &im->lookup_main;
1720   ip_local_next_t next_index;
1721   u32 * from, * to_next, n_left_from, n_left_to_next;
1722   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1723
1724   from = vlib_frame_vector_args (frame);
1725   n_left_from = frame->n_vectors;
1726   next_index = node->cached_next_index;
1727   
1728   if (node->flags & VLIB_NODE_FLAG_TRACE)
1729     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1730
1731   while (n_left_from > 0)
1732     {
1733       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1734
1735       while (n_left_from >= 4 && n_left_to_next >= 2)
1736         {
1737           vlib_buffer_t * p0, * p1;
1738           ip4_header_t * ip0, * ip1;
1739           udp_header_t * udp0, * udp1;
1740           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1741           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1742           ip_adjacency_t * adj0, * adj1;
1743           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1744           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1745           i32 len_diff0, len_diff1;
1746           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1747           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1748           u8 enqueue_code;
1749       
1750           pi0 = to_next[0] = from[0];
1751           pi1 = to_next[1] = from[1];
1752           from += 2;
1753           n_left_from -= 2;
1754           to_next += 2;
1755           n_left_to_next -= 2;
1756       
1757           p0 = vlib_get_buffer (vm, pi0);
1758           p1 = vlib_get_buffer (vm, pi1);
1759
1760           ip0 = vlib_buffer_get_current (p0);
1761           ip1 = vlib_buffer_get_current (p1);
1762
1763           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1764                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1765           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1766                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1767
1768           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1769           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1770
1771           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1772
1773           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1774           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1775
1776           proto0 = ip0->protocol;
1777           proto1 = ip1->protocol;
1778           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1779           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1780           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1781           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1782
1783           flags0 = p0->flags;
1784           flags1 = p1->flags;
1785
1786           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1787           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1788
1789           udp0 = ip4_next_header (ip0);
1790           udp1 = ip4_next_header (ip1);
1791
1792           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1793           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1794           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1795
1796           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1797           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1798
1799           /* Verify UDP length. */
1800           ip_len0 = clib_net_to_host_u16 (ip0->length);
1801           ip_len1 = clib_net_to_host_u16 (ip1->length);
1802           udp_len0 = clib_net_to_host_u16 (udp0->length);
1803           udp_len1 = clib_net_to_host_u16 (udp1->length);
1804
1805           len_diff0 = ip_len0 - udp_len0;
1806           len_diff1 = ip_len1 - udp_len1;
1807
1808           len_diff0 = is_udp0 ? len_diff0 : 0;
1809           len_diff1 = is_udp1 ? len_diff1 : 0;
1810
1811           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1812                                 & good_tcp_udp0 & good_tcp_udp1)))
1813             {
1814               if (is_tcp_udp0)
1815                 {
1816                   if (is_tcp_udp0
1817                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1818                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1819                   good_tcp_udp0 =
1820                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1821                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1822                 }
1823               if (is_tcp_udp1)
1824                 {
1825                   if (is_tcp_udp1
1826                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1827                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1828                   good_tcp_udp1 =
1829                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1830                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1831                 }
1832             }
1833
1834           good_tcp_udp0 &= len_diff0 >= 0;
1835           good_tcp_udp1 &= len_diff1 >= 0;
1836
1837           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1838           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1839
1840           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1841
1842           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1843           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1844
1845           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1846           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1847                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1848                     : error0);
1849           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1850                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1851                     : error1);
1852
1853           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1854           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1855
1856           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1857           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1858
1859           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1860           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1861
1862           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1863                                                            &ip0->src_address,
1864                                                            /* no_default_route */ 1));
1865           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1866                                                            &ip1->src_address,
1867                                                            /* no_default_route */ 1));
1868
1869           adj0 = ip_get_adjacency (lm, adj_index0);
1870           adj1 = ip_get_adjacency (lm, adj_index1);
1871
1872           /* 
1873            * Must have a route to source otherwise we drop the packet.
1874            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1875            */
1876           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1877                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1878                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1879                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1880                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1881                     ? IP4_ERROR_SRC_LOOKUP_MISS
1882                     : error0);
1883           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1884                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1885                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1886                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1887                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1888                     ? IP4_ERROR_SRC_LOOKUP_MISS
1889                     : error1);
1890
1891           next0 = lm->local_next_by_ip_protocol[proto0];
1892           next1 = lm->local_next_by_ip_protocol[proto1];
1893
1894           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1895           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1896
1897           p0->error = error0 ? error_node->errors[error0] : 0;
1898           p1->error = error1 ? error_node->errors[error1] : 0;
1899
1900           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1901
1902           if (PREDICT_FALSE (enqueue_code != 0))
1903             {
1904               switch (enqueue_code)
1905                 {
1906                 case 1:
1907                   /* A B A */
1908                   to_next[-2] = pi1;
1909                   to_next -= 1;
1910                   n_left_to_next += 1;
1911                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1912                   break;
1913
1914                 case 2:
1915                   /* A A B */
1916                   to_next -= 1;
1917                   n_left_to_next += 1;
1918                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1919                   break;
1920
1921                 case 3:
1922                   /* A B B or A B C */
1923                   to_next -= 2;
1924                   n_left_to_next += 2;
1925                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1926                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1927                   if (next0 == next1)
1928                     {
1929                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1930                       next_index = next1;
1931                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1932                     }
1933                   break;
1934                 }
1935             }
1936         }
1937
1938       while (n_left_from > 0 && n_left_to_next > 0)
1939         {
1940           vlib_buffer_t * p0;
1941           ip4_header_t * ip0;
1942           udp_header_t * udp0;
1943           ip4_fib_mtrie_t * mtrie0;
1944           ip4_fib_mtrie_leaf_t leaf0;
1945           ip_adjacency_t * adj0;
1946           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1947           i32 len_diff0;
1948           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1949       
1950           pi0 = to_next[0] = from[0];
1951           from += 1;
1952           n_left_from -= 1;
1953           to_next += 1;
1954           n_left_to_next -= 1;
1955       
1956           p0 = vlib_get_buffer (vm, pi0);
1957
1958           ip0 = vlib_buffer_get_current (p0);
1959
1960           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1961                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1962
1963           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1964
1965           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1966
1967           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1968
1969           proto0 = ip0->protocol;
1970           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1971           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1972
1973           flags0 = p0->flags;
1974
1975           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1976
1977           udp0 = ip4_next_header (ip0);
1978
1979           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1980           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1981
1982           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1983
1984           /* Verify UDP length. */
1985           ip_len0 = clib_net_to_host_u16 (ip0->length);
1986           udp_len0 = clib_net_to_host_u16 (udp0->length);
1987
1988           len_diff0 = ip_len0 - udp_len0;
1989
1990           len_diff0 = is_udp0 ? len_diff0 : 0;
1991
1992           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1993             {
1994               if (is_tcp_udp0)
1995                 {
1996                   if (is_tcp_udp0
1997                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1998                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1999                   good_tcp_udp0 =
2000                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2001                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2002                 }
2003             }
2004
2005           good_tcp_udp0 &= len_diff0 >= 0;
2006
2007           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2008
2009           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2010
2011           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2012
2013           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2014           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2015                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2016                     : error0);
2017
2018           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2019
2020           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2021           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2022
2023           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2024                                                            &ip0->src_address,
2025                                                            /* no_default_route */ 1));
2026
2027           adj0 = ip_get_adjacency (lm, adj_index0);
2028
2029           /* Must have a route to source otherwise we drop the packet. */
2030           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2031                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2032                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2033                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2034                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2035                     ? IP4_ERROR_SRC_LOOKUP_MISS
2036                     : error0);
2037
2038           next0 = lm->local_next_by_ip_protocol[proto0];
2039
2040           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2041
2042           p0->error = error0? error_node->errors[error0] : 0;
2043
2044           if (PREDICT_FALSE (next0 != next_index))
2045             {
2046               n_left_to_next += 1;
2047               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2048
2049               next_index = next0;
2050               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2051               to_next[0] = pi0;
2052               to_next += 1;
2053               n_left_to_next -= 1;
2054             }
2055         }
2056   
2057       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2058     }
2059
2060   return frame->n_vectors;
2061 }
2062
2063 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2064   .function = ip4_local,
2065   .name = "ip4-local",
2066   .vector_size = sizeof (u32),
2067
2068   .format_trace = format_ip4_forward_next_trace,
2069
2070   .n_next_nodes = IP_LOCAL_N_NEXT,
2071   .next_nodes = {
2072     [IP_LOCAL_NEXT_DROP] = "error-drop",
2073     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2074     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2075     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2076   },
2077 };
2078
2079 void ip4_register_protocol (u32 protocol, u32 node_index)
2080 {
2081   vlib_main_t * vm = vlib_get_main();
2082   ip4_main_t * im = &ip4_main;
2083   ip_lookup_main_t * lm = &im->lookup_main;
2084
2085   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2086   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2087 }
2088
2089 static clib_error_t *
2090 show_ip_local_command_fn (vlib_main_t * vm,
2091                           unformat_input_t * input,
2092                          vlib_cli_command_t * cmd)
2093 {
2094   ip4_main_t * im = &ip4_main;
2095   ip_lookup_main_t * lm = &im->lookup_main;
2096   int i;
2097
2098   vlib_cli_output (vm, "Protocols handled by ip4_local");
2099   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2100     {
2101       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2102         vlib_cli_output (vm, "%d", i);
2103     }
2104   return 0;
2105 }
2106
2107
2108
2109 VLIB_CLI_COMMAND (show_ip_local, static) = {
2110   .path = "show ip local",
2111   .function = show_ip_local_command_fn,
2112   .short_help = "Show ip local protocol table",
2113 };
2114
2115 static uword
2116 ip4_arp (vlib_main_t * vm,
2117          vlib_node_runtime_t * node,
2118          vlib_frame_t * frame)
2119 {
2120   vnet_main_t * vnm = vnet_get_main();
2121   ip4_main_t * im = &ip4_main;
2122   ip_lookup_main_t * lm = &im->lookup_main;
2123   u32 * from, * to_next_drop;
2124   uword n_left_from, n_left_to_next_drop, next_index;
2125   static f64 time_last_seed_change = -1e100;
2126   static u32 hash_seeds[3];
2127   static uword hash_bitmap[256 / BITS (uword)]; 
2128   f64 time_now;
2129
2130   if (node->flags & VLIB_NODE_FLAG_TRACE)
2131     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2132
2133   time_now = vlib_time_now (vm);
2134   if (time_now - time_last_seed_change > 1e-3)
2135     {
2136       uword i;
2137       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2138                                              sizeof (hash_seeds));
2139       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2140         hash_seeds[i] = r[i];
2141
2142       /* Mark all hash keys as been no-seen before. */
2143       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2144         hash_bitmap[i] = 0;
2145
2146       time_last_seed_change = time_now;
2147     }
2148
2149   from = vlib_frame_vector_args (frame);
2150   n_left_from = frame->n_vectors;
2151   next_index = node->cached_next_index;
2152   if (next_index == IP4_ARP_NEXT_DROP)
2153     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2154
2155   while (n_left_from > 0)
2156     {
2157       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2158                            to_next_drop, n_left_to_next_drop);
2159
2160       while (n_left_from > 0 && n_left_to_next_drop > 0)
2161         {
2162           vlib_buffer_t * p0;
2163           ip4_header_t * ip0;
2164           ethernet_header_t * eh0;
2165           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2166           uword bm0;
2167           ip_adjacency_t * adj0;
2168
2169           pi0 = from[0];
2170
2171           p0 = vlib_get_buffer (vm, pi0);
2172
2173           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2174           adj0 = ip_get_adjacency (lm, adj_index0);
2175           ip0 = vlib_buffer_get_current (p0);
2176
2177           /* If packet destination is not local, send ARP to next hop */
2178           if (adj0->arp.next_hop.ip4.as_u32)
2179             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2180
2181           /* 
2182            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2183            * rewrite to this packet, we need to skip it here.
2184            * Note, to distinguish from src IP addr *.8.6.*, we
2185            * check for a bcast eth dest instead of IPv4 version.
2186            */
2187           eh0 = (ethernet_header_t*)ip0;
2188           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2189             {
2190               u32 vlan_num = 0;
2191               u16 * etype = &eh0->type;
2192               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2193                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2194                 {
2195                   vlan_num += 1;
2196                   etype += 2; //vlan tag also 16 bits, same as etype
2197                 }
2198               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2199                 {
2200                   vlib_buffer_advance (
2201                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2202                   ip0 = vlib_buffer_get_current (p0);
2203                 }
2204             }
2205
2206           a0 = hash_seeds[0];
2207           b0 = hash_seeds[1];
2208           c0 = hash_seeds[2];
2209
2210           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2211           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2212
2213           a0 ^= ip0->dst_address.data_u32;
2214           b0 ^= sw_if_index0;
2215
2216           hash_v3_finalize32 (a0, b0, c0);
2217
2218           c0 &= BITS (hash_bitmap) - 1;
2219           c0 = c0 / BITS (uword);
2220           m0 = (uword) 1 << (c0 % BITS (uword));
2221
2222           bm0 = hash_bitmap[c0];
2223           drop0 = (bm0 & m0) != 0;
2224
2225           /* Mark it as seen. */
2226           hash_bitmap[c0] = bm0 | m0;
2227
2228           from += 1;
2229           n_left_from -= 1;
2230           to_next_drop[0] = pi0;
2231           to_next_drop += 1;
2232           n_left_to_next_drop -= 1;
2233
2234           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2235
2236           if (drop0)
2237             continue;
2238
2239           /* 
2240            * Can happen if the control-plane is programming tables
2241            * with traffic flowing; at least that's today's lame excuse.
2242            */
2243           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2244             {
2245               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2246             }
2247           else
2248           /* Send ARP request. */
2249           {
2250             u32 bi0 = 0;
2251             vlib_buffer_t * b0;
2252             ethernet_arp_header_t * h0;
2253             vnet_hw_interface_t * hw_if0;
2254
2255             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2256
2257             /* Add rewrite/encap string for ARP packet. */
2258             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2259
2260             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2261
2262             /* Src ethernet address in ARP header. */
2263             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2264                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2265
2266             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2267
2268             /* Copy in destination address we are requesting. */
2269             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2270
2271             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2272             b0 = vlib_get_buffer (vm, bi0);
2273             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2274
2275             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2276
2277             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2278           }
2279         }
2280
2281       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2282     }
2283
2284   return frame->n_vectors;
2285 }
2286
2287 static char * ip4_arp_error_strings[] = {
2288   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2289   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2290   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2291   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2292   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2293 };
2294
2295 VLIB_REGISTER_NODE (ip4_arp_node) = {
2296   .function = ip4_arp,
2297   .name = "ip4-arp",
2298   .vector_size = sizeof (u32),
2299
2300   .format_trace = format_ip4_forward_next_trace,
2301
2302   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2303   .error_strings = ip4_arp_error_strings,
2304
2305   .n_next_nodes = IP4_ARP_N_NEXT,
2306   .next_nodes = {
2307     [IP4_ARP_NEXT_DROP] = "error-drop",
2308   },
2309 };
2310
2311 #define foreach_notrace_ip4_arp_error           \
2312 _(DROP)                                         \
2313 _(REQUEST_SENT)                                 \
2314 _(REPLICATE_DROP)                               \
2315 _(REPLICATE_FAIL)
2316
2317 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2318 {
2319   vlib_node_runtime_t *rt = 
2320     vlib_node_get_runtime (vm, ip4_arp_node.index);
2321
2322   /* don't trace ARP request packets */
2323 #define _(a)                                    \
2324     vnet_pcap_drop_trace_filter_add_del         \
2325         (rt->errors[IP4_ARP_ERROR_##a],         \
2326          1 /* is_add */);
2327     foreach_notrace_ip4_arp_error;
2328 #undef _
2329   return 0;
2330 }
2331
2332 VLIB_INIT_FUNCTION(arp_notrace_init);
2333
2334
2335 /* Send an ARP request to see if given destination is reachable on given interface. */
2336 clib_error_t *
2337 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2338 {
2339   vnet_main_t * vnm = vnet_get_main();
2340   ip4_main_t * im = &ip4_main;
2341   ethernet_arp_header_t * h;
2342   ip4_address_t * src;
2343   ip_interface_address_t * ia;
2344   ip_adjacency_t * adj;
2345   vnet_hw_interface_t * hi;
2346   vnet_sw_interface_t * si;
2347   vlib_buffer_t * b;
2348   u32 bi = 0;
2349
2350   si = vnet_get_sw_interface (vnm, sw_if_index);
2351
2352   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2353     {
2354       return clib_error_return (0, "%U: interface %U down",
2355                                 format_ip4_address, dst, 
2356                                 format_vnet_sw_if_index_name, vnm, 
2357                                 sw_if_index);
2358     }
2359
2360   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2361   if (! src)
2362     {
2363       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2364       return clib_error_return 
2365         (0, "no matching interface address for destination %U (interface %U)",
2366          format_ip4_address, dst,
2367          format_vnet_sw_if_index_name, vnm, sw_if_index);
2368     }
2369
2370   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2371
2372   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2373
2374   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2375
2376   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2377
2378   h->ip4_over_ethernet[0].ip4 = src[0];
2379   h->ip4_over_ethernet[1].ip4 = dst[0];
2380
2381   b = vlib_get_buffer (vm, bi);
2382   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2383
2384   /* Add encapsulation string for software interface (e.g. ethernet header). */
2385   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2386   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2387
2388   {
2389     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2390     u32 * to_next = vlib_frame_vector_args (f);
2391     to_next[0] = bi;
2392     f->n_vectors = 1;
2393     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2394   }
2395
2396   return /* no error */ 0;
2397 }
2398
2399 typedef enum {
2400   IP4_REWRITE_NEXT_DROP,
2401   IP4_REWRITE_NEXT_ARP,
2402 } ip4_rewrite_next_t;
2403
2404 always_inline uword
2405 ip4_rewrite_inline (vlib_main_t * vm,
2406                     vlib_node_runtime_t * node,
2407                     vlib_frame_t * frame,
2408                     int rewrite_for_locally_received_packets)
2409 {
2410   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2411   u32 * from = vlib_frame_vector_args (frame);
2412   u32 n_left_from, n_left_to_next, * to_next, next_index;
2413   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2414   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2415
2416   n_left_from = frame->n_vectors;
2417   next_index = node->cached_next_index;
2418   u32 cpu_index = os_get_cpu_number();
2419   
2420   while (n_left_from > 0)
2421     {
2422       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2423
2424       while (n_left_from >= 4 && n_left_to_next >= 2)
2425         {
2426           ip_adjacency_t * adj0, * adj1;
2427           vlib_buffer_t * p0, * p1;
2428           ip4_header_t * ip0, * ip1;
2429           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2430           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2431           u32 next0_override, next1_override;
2432       
2433           if (rewrite_for_locally_received_packets)
2434               next0_override = next1_override = 0;
2435
2436           /* Prefetch next iteration. */
2437           {
2438             vlib_buffer_t * p2, * p3;
2439
2440             p2 = vlib_get_buffer (vm, from[2]);
2441             p3 = vlib_get_buffer (vm, from[3]);
2442
2443             vlib_prefetch_buffer_header (p2, STORE);
2444             vlib_prefetch_buffer_header (p3, STORE);
2445
2446             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2447             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2448           }
2449
2450           pi0 = to_next[0] = from[0];
2451           pi1 = to_next[1] = from[1];
2452
2453           from += 2;
2454           n_left_from -= 2;
2455           to_next += 2;
2456           n_left_to_next -= 2;
2457       
2458           p0 = vlib_get_buffer (vm, pi0);
2459           p1 = vlib_get_buffer (vm, pi1);
2460
2461           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2462           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2463
2464           /* We should never rewrite a pkt using the MISS adjacency */
2465           ASSERT(adj_index0 && adj_index1);
2466
2467           ip0 = vlib_buffer_get_current (p0);
2468           ip1 = vlib_buffer_get_current (p1);
2469
2470           error0 = error1 = IP4_ERROR_NONE;
2471
2472           /* Decrement TTL & update checksum.
2473              Works either endian, so no need for byte swap. */
2474           if (! rewrite_for_locally_received_packets)
2475             {
2476               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2477
2478               /* Input node should have reject packets with ttl 0. */
2479               ASSERT (ip0->ttl > 0);
2480               ASSERT (ip1->ttl > 0);
2481
2482               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2483               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2484
2485               checksum0 += checksum0 >= 0xffff;
2486               checksum1 += checksum1 >= 0xffff;
2487
2488               ip0->checksum = checksum0;
2489               ip1->checksum = checksum1;
2490
2491               ttl0 -= 1;
2492               ttl1 -= 1;
2493
2494               ip0->ttl = ttl0;
2495               ip1->ttl = ttl1;
2496
2497               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2498               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2499
2500               /* Verify checksum. */
2501               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2502               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2503             }
2504
2505           /* Rewrite packet header and updates lengths. */
2506           adj0 = ip_get_adjacency (lm, adj_index0);
2507           adj1 = ip_get_adjacency (lm, adj_index1);
2508       
2509           if (rewrite_for_locally_received_packets)
2510             {
2511               /*
2512                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2513                * we end up here with a local adjacency in hand
2514                * The local adj rewrite data is 0xfefe on purpose.
2515                * Bad engineer, no donut for you.
2516                */
2517               if (PREDICT_FALSE(adj0->lookup_next_index 
2518                                 == IP_LOOKUP_NEXT_LOCAL))
2519                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2520               if (PREDICT_FALSE(adj0->lookup_next_index
2521                                 == IP_LOOKUP_NEXT_ARP))
2522                 next0_override = IP4_REWRITE_NEXT_ARP;
2523               if (PREDICT_FALSE(adj1->lookup_next_index 
2524                                 == IP_LOOKUP_NEXT_LOCAL))
2525                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2526               if (PREDICT_FALSE(adj1->lookup_next_index
2527                                 == IP_LOOKUP_NEXT_ARP))
2528                 next1_override = IP4_REWRITE_NEXT_ARP;
2529             }
2530
2531           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2532           rw_len0 = adj0[0].rewrite_header.data_bytes;
2533           rw_len1 = adj1[0].rewrite_header.data_bytes;
2534           next0 = (error0 == IP4_ERROR_NONE) 
2535             ? adj0[0].rewrite_header.next_index : 0;
2536
2537           if (rewrite_for_locally_received_packets)
2538               next0 = next0 && next0_override ? next0_override : next0;
2539
2540           next1 = (error1 == IP4_ERROR_NONE)
2541             ? adj1[0].rewrite_header.next_index : 0;
2542
2543           if (rewrite_for_locally_received_packets)
2544               next1 = next1 && next1_override ? next1_override : next1;
2545
2546           /* 
2547            * We've already accounted for an ethernet_header_t elsewhere
2548            */
2549           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2550               vlib_increment_combined_counter 
2551                   (&lm->adjacency_counters,
2552                    cpu_index, adj_index0, 
2553                    /* packet increment */ 0,
2554                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2555
2556           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2557               vlib_increment_combined_counter 
2558                   (&lm->adjacency_counters,
2559                    cpu_index, adj_index1, 
2560                    /* packet increment */ 0,
2561                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2562
2563           /* Check MTU of outgoing interface. */
2564           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2565                     ? IP4_ERROR_MTU_EXCEEDED
2566                     : error0);
2567           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2568                     ? IP4_ERROR_MTU_EXCEEDED
2569                     : error1);
2570
2571           p0->current_data -= rw_len0;
2572           p1->current_data -= rw_len1;
2573
2574           p0->current_length += rw_len0;
2575           p1->current_length += rw_len1;
2576
2577           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2578           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2579       
2580           p0->error = error_node->errors[error0];
2581           p1->error = error_node->errors[error1];
2582
2583           /* Guess we are only writing on simple Ethernet header. */
2584           vnet_rewrite_two_headers (adj0[0], adj1[0],
2585                                     ip0, ip1,
2586                                     sizeof (ethernet_header_t));
2587       
2588           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2589                                            to_next, n_left_to_next,
2590                                            pi0, pi1, next0, next1);
2591         }
2592
2593       while (n_left_from > 0 && n_left_to_next > 0)
2594         {
2595           ip_adjacency_t * adj0;
2596           vlib_buffer_t * p0;
2597           ip4_header_t * ip0;
2598           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2599           u32 next0_override;
2600       
2601           if (rewrite_for_locally_received_packets)
2602               next0_override = 0;
2603
2604           pi0 = to_next[0] = from[0];
2605
2606           p0 = vlib_get_buffer (vm, pi0);
2607
2608           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2609
2610           /* We should never rewrite a pkt using the MISS adjacency */
2611           ASSERT(adj_index0);
2612
2613           adj0 = ip_get_adjacency (lm, adj_index0);
2614       
2615           ip0 = vlib_buffer_get_current (p0);
2616
2617           error0 = IP4_ERROR_NONE;
2618           next0 = 0;            /* drop on error */
2619
2620           /* Decrement TTL & update checksum. */
2621           if (! rewrite_for_locally_received_packets)
2622             {
2623               i32 ttl0 = ip0->ttl;
2624
2625               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2626
2627               checksum0 += checksum0 >= 0xffff;
2628
2629               ip0->checksum = checksum0;
2630
2631               ASSERT (ip0->ttl > 0);
2632
2633               ttl0 -= 1;
2634
2635               ip0->ttl = ttl0;
2636
2637               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2638
2639               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2640             }
2641
2642           if (rewrite_for_locally_received_packets)
2643             {
2644               /*
2645                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2646                * we end up here with a local adjacency in hand
2647                * The local adj rewrite data is 0xfefe on purpose.
2648                * Bad engineer, no donut for you.
2649                */
2650               if (PREDICT_FALSE(adj0->lookup_next_index 
2651                                 == IP_LOOKUP_NEXT_LOCAL))
2652                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2653               /* 
2654                * We have to override the next_index in ARP adjacencies,
2655                * because they're set up for ip4-arp, not this node...
2656                */
2657               if (PREDICT_FALSE(adj0->lookup_next_index
2658                                 == IP_LOOKUP_NEXT_ARP))
2659                 next0_override = IP4_REWRITE_NEXT_ARP;
2660             }
2661
2662           /* Guess we are only writing on simple Ethernet header. */
2663           vnet_rewrite_one_header (adj0[0], ip0, 
2664                                    sizeof (ethernet_header_t));
2665           
2666           /* Update packet buffer attributes/set output interface. */
2667           rw_len0 = adj0[0].rewrite_header.data_bytes;
2668           
2669           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2670               vlib_increment_combined_counter 
2671                   (&lm->adjacency_counters,
2672                    cpu_index, adj_index0, 
2673                    /* packet increment */ 0,
2674                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2675           
2676           /* Check MTU of outgoing interface. */
2677           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2678                     > adj0[0].rewrite_header.max_l3_packet_bytes
2679                     ? IP4_ERROR_MTU_EXCEEDED
2680                     : error0);
2681           
2682           p0->error = error_node->errors[error0];
2683           p0->current_data -= rw_len0;
2684           p0->current_length += rw_len0;
2685           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2686             adj0[0].rewrite_header.sw_if_index;
2687           
2688           next0 = (error0 == IP4_ERROR_NONE)
2689             ? adj0[0].rewrite_header.next_index : 0;
2690
2691           if (rewrite_for_locally_received_packets)
2692               next0 = next0 && next0_override ? next0_override : next0;
2693
2694           from += 1;
2695           n_left_from -= 1;
2696           to_next += 1;
2697           n_left_to_next -= 1;
2698       
2699           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2700                                            to_next, n_left_to_next,
2701                                            pi0, next0);
2702         }
2703   
2704       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2705     }
2706
2707   /* Need to do trace after rewrites to pick up new packet data. */
2708   if (node->flags & VLIB_NODE_FLAG_TRACE)
2709     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2710
2711   return frame->n_vectors;
2712 }
2713
2714 static uword
2715 ip4_rewrite_transit (vlib_main_t * vm,
2716                      vlib_node_runtime_t * node,
2717                      vlib_frame_t * frame)
2718 {
2719   return ip4_rewrite_inline (vm, node, frame,
2720                              /* rewrite_for_locally_received_packets */ 0);
2721 }
2722
2723 static uword
2724 ip4_rewrite_local (vlib_main_t * vm,
2725                    vlib_node_runtime_t * node,
2726                    vlib_frame_t * frame)
2727 {
2728   return ip4_rewrite_inline (vm, node, frame,
2729                              /* rewrite_for_locally_received_packets */ 1);
2730 }
2731
2732 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2733   .function = ip4_rewrite_transit,
2734   .name = "ip4-rewrite-transit",
2735   .vector_size = sizeof (u32),
2736
2737   .format_trace = format_ip4_forward_next_trace,
2738
2739   .n_next_nodes = 2,
2740   .next_nodes = {
2741     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2742     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2743   },
2744 };
2745
2746 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2747   .function = ip4_rewrite_local,
2748   .name = "ip4-rewrite-local",
2749   .vector_size = sizeof (u32),
2750
2751   .sibling_of = "ip4-rewrite-transit",
2752
2753   .format_trace = format_ip4_forward_next_trace,
2754
2755   .n_next_nodes = 2,
2756   .next_nodes = {
2757     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2758     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2759   },
2760 };
2761
2762 static clib_error_t *
2763 add_del_interface_table (vlib_main_t * vm,
2764                          unformat_input_t * input,
2765                          vlib_cli_command_t * cmd)
2766 {
2767   vnet_main_t * vnm = vnet_get_main();
2768   clib_error_t * error = 0;
2769   u32 sw_if_index, table_id;
2770
2771   sw_if_index = ~0;
2772
2773   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2774     {
2775       error = clib_error_return (0, "unknown interface `%U'",
2776                                  format_unformat_error, input);
2777       goto done;
2778     }
2779
2780   if (unformat (input, "%d", &table_id))
2781     ;
2782   else
2783     {
2784       error = clib_error_return (0, "expected table id `%U'",
2785                                  format_unformat_error, input);
2786       goto done;
2787     }
2788
2789   {
2790     ip4_main_t * im = &ip4_main;
2791     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2792
2793     if (fib) 
2794       {
2795         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2796         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2797     }
2798   }
2799
2800  done:
2801   return error;
2802 }
2803
2804 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2805   .path = "set interface ip table",
2806   .function = add_del_interface_table,
2807   .short_help = "Add/delete FIB table id for interface",
2808 };
2809
2810
2811 static uword
2812 ip4_lookup_multicast (vlib_main_t * vm,
2813                       vlib_node_runtime_t * node,
2814                       vlib_frame_t * frame)
2815 {
2816   ip4_main_t * im = &ip4_main;
2817   ip_lookup_main_t * lm = &im->lookup_main;
2818   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2819   u32 n_left_from, n_left_to_next, * from, * to_next;
2820   ip_lookup_next_t next;
2821   u32 cpu_index = os_get_cpu_number();
2822
2823   from = vlib_frame_vector_args (frame);
2824   n_left_from = frame->n_vectors;
2825   next = node->cached_next_index;
2826
2827   while (n_left_from > 0)
2828     {
2829       vlib_get_next_frame (vm, node, next,
2830                            to_next, n_left_to_next);
2831
2832       while (n_left_from >= 4 && n_left_to_next >= 2)
2833         {
2834           vlib_buffer_t * p0, * p1;
2835           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2836           ip_lookup_next_t next0, next1;
2837           ip4_header_t * ip0, * ip1;
2838           ip_adjacency_t * adj0, * adj1;
2839           u32 fib_index0, fib_index1;
2840           u32 flow_hash_config0, flow_hash_config1;
2841
2842           /* Prefetch next iteration. */
2843           {
2844             vlib_buffer_t * p2, * p3;
2845
2846             p2 = vlib_get_buffer (vm, from[2]);
2847             p3 = vlib_get_buffer (vm, from[3]);
2848
2849             vlib_prefetch_buffer_header (p2, LOAD);
2850             vlib_prefetch_buffer_header (p3, LOAD);
2851
2852             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2853             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2854           }
2855
2856           pi0 = to_next[0] = from[0];
2857           pi1 = to_next[1] = from[1];
2858
2859           p0 = vlib_get_buffer (vm, pi0);
2860           p1 = vlib_get_buffer (vm, pi1);
2861
2862           ip0 = vlib_buffer_get_current (p0);
2863           ip1 = vlib_buffer_get_current (p1);
2864
2865           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2866           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2867           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2868             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2869           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2870             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2871
2872           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2873                                               &ip0->dst_address, p0);
2874           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2875                                               &ip1->dst_address, p1);
2876
2877           adj0 = ip_get_adjacency (lm, adj_index0);
2878           adj1 = ip_get_adjacency (lm, adj_index1);
2879
2880           next0 = adj0->lookup_next_index;
2881           next1 = adj1->lookup_next_index;
2882
2883           flow_hash_config0 = 
2884               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2885
2886           flow_hash_config1 = 
2887               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2888
2889           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2890               (ip0, flow_hash_config0);
2891                                                                   
2892           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2893               (ip1, flow_hash_config1);
2894
2895           ASSERT (adj0->n_adj > 0);
2896           ASSERT (adj1->n_adj > 0);
2897           ASSERT (is_pow2 (adj0->n_adj));
2898           ASSERT (is_pow2 (adj1->n_adj));
2899           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2900           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2901
2902           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2903           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2904
2905           if (1) /* $$$$$$ HACK FIXME */
2906           vlib_increment_combined_counter 
2907               (cm, cpu_index, adj_index0, 1,
2908                vlib_buffer_length_in_chain (vm, p0));
2909           if (1) /* $$$$$$ HACK FIXME */
2910           vlib_increment_combined_counter 
2911               (cm, cpu_index, adj_index1, 1,
2912                vlib_buffer_length_in_chain (vm, p1));
2913
2914           from += 2;
2915           to_next += 2;
2916           n_left_to_next -= 2;
2917           n_left_from -= 2;
2918
2919           wrong_next = (next0 != next) + 2*(next1 != next);
2920           if (PREDICT_FALSE (wrong_next != 0))
2921             {
2922               switch (wrong_next)
2923                 {
2924                 case 1:
2925                   /* A B A */
2926                   to_next[-2] = pi1;
2927                   to_next -= 1;
2928                   n_left_to_next += 1;
2929                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2930                   break;
2931
2932                 case 2:
2933                   /* A A B */
2934                   to_next -= 1;
2935                   n_left_to_next += 1;
2936                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2937                   break;
2938
2939                 case 3:
2940                   /* A B C */
2941                   to_next -= 2;
2942                   n_left_to_next += 2;
2943                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2944                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2945                   if (next0 == next1)
2946                     {
2947                       /* A B B */
2948                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2949                       next = next1;
2950                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2951                     }
2952                 }
2953             }
2954         }
2955     
2956       while (n_left_from > 0 && n_left_to_next > 0)
2957         {
2958           vlib_buffer_t * p0;
2959           ip4_header_t * ip0;
2960           u32 pi0, adj_index0;
2961           ip_lookup_next_t next0;
2962           ip_adjacency_t * adj0;
2963           u32 fib_index0;
2964           u32 flow_hash_config0;
2965
2966           pi0 = from[0];
2967           to_next[0] = pi0;
2968
2969           p0 = vlib_get_buffer (vm, pi0);
2970
2971           ip0 = vlib_buffer_get_current (p0);
2972
2973           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2974                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2975           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2976               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2977           
2978           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2979                                               &ip0->dst_address, p0);
2980
2981           adj0 = ip_get_adjacency (lm, adj_index0);
2982
2983           next0 = adj0->lookup_next_index;
2984
2985           flow_hash_config0 = 
2986               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2987
2988           vnet_buffer (p0)->ip.flow_hash = 
2989             ip4_compute_flow_hash (ip0, flow_hash_config0);
2990
2991           ASSERT (adj0->n_adj > 0);
2992           ASSERT (is_pow2 (adj0->n_adj));
2993           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2994
2995           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2996
2997           if (1) /* $$$$$$ HACK FIXME */
2998               vlib_increment_combined_counter 
2999                   (cm, cpu_index, adj_index0, 1,
3000                    vlib_buffer_length_in_chain (vm, p0));
3001
3002           from += 1;
3003           to_next += 1;
3004           n_left_to_next -= 1;
3005           n_left_from -= 1;
3006
3007           if (PREDICT_FALSE (next0 != next))
3008             {
3009               n_left_to_next += 1;
3010               vlib_put_next_frame (vm, node, next, n_left_to_next);
3011               next = next0;
3012               vlib_get_next_frame (vm, node, next,
3013                                    to_next, n_left_to_next);
3014               to_next[0] = pi0;
3015               to_next += 1;
3016               n_left_to_next -= 1;
3017             }
3018         }
3019
3020       vlib_put_next_frame (vm, node, next, n_left_to_next);
3021     }
3022
3023   return frame->n_vectors;
3024 }
3025
3026 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3027   .function = ip4_lookup_multicast,
3028   .name = "ip4-lookup-multicast",
3029   .vector_size = sizeof (u32),
3030
3031   .n_next_nodes = IP_LOOKUP_N_NEXT,
3032   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3033 };
3034
3035 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3036   .function = ip4_drop,
3037   .name = "ip4-multicast",
3038   .vector_size = sizeof (u32),
3039
3040   .format_trace = format_ip4_forward_next_trace,
3041
3042   .n_next_nodes = 1,
3043   .next_nodes = {
3044     [0] = "error-drop",
3045   },
3046 };
3047
3048 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3049 {
3050   ip4_main_t * im = &ip4_main;
3051   ip4_fib_mtrie_t * mtrie0;
3052   ip4_fib_mtrie_leaf_t leaf0;
3053   u32 adj_index0;
3054     
3055   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3056
3057   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3058   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3059   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3060   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3061   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3062   
3063   /* Handle default route. */
3064   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3065   
3066   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3067   
3068   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3069                                                   a, 
3070                                                   /* no_default_route */ 0);
3071 }
3072  
3073 static clib_error_t *
3074 test_lookup_command_fn (vlib_main_t * vm,
3075                         unformat_input_t * input,
3076                         vlib_cli_command_t * cmd)
3077 {
3078   u32 table_id = 0;
3079   f64 count = 1;
3080   u32 n;
3081   int i;
3082   ip4_address_t ip4_base_address;
3083   u64 errors = 0;
3084
3085   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3086       if (unformat (input, "table %d", &table_id))
3087         ;
3088       else if (unformat (input, "count %f", &count))
3089         ;
3090
3091       else if (unformat (input, "%U",
3092                          unformat_ip4_address, &ip4_base_address))
3093         ;
3094       else
3095         return clib_error_return (0, "unknown input `%U'",
3096                                   format_unformat_error, input);
3097   }
3098
3099   n = count;
3100
3101   for (i = 0; i < n; i++)
3102     {
3103       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3104         errors++;
3105
3106       ip4_base_address.as_u32 = 
3107         clib_host_to_net_u32 (1 + 
3108                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3109     }
3110
3111   if (errors) 
3112     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3113   else
3114     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3115
3116   return 0;
3117 }
3118
3119 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3120     .path = "test lookup",
3121     .short_help = "test lookup",
3122     .function = test_lookup_command_fn,
3123 };
3124
3125 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3126 {
3127   ip4_main_t * im4 = &ip4_main;
3128   ip4_fib_t * fib;
3129   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3130
3131   if (p == 0)
3132     return VNET_API_ERROR_NO_SUCH_FIB;
3133
3134   fib = vec_elt_at_index (im4->fibs, p[0]);
3135
3136   fib->flow_hash_config = flow_hash_config;
3137   return 0;
3138 }
3139  
3140 static clib_error_t *
3141 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3142                              unformat_input_t * input,
3143                              vlib_cli_command_t * cmd)
3144 {
3145   int matched = 0;
3146   u32 table_id = 0;
3147   u32 flow_hash_config = 0;
3148   int rv;
3149
3150   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3151     if (unformat (input, "table %d", &table_id))
3152       matched = 1;
3153 #define _(a,v) \
3154     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3155     foreach_flow_hash_bit
3156 #undef _
3157     else break;
3158   }
3159   
3160   if (matched == 0)
3161     return clib_error_return (0, "unknown input `%U'",
3162                               format_unformat_error, input);
3163   
3164   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3165   switch (rv)
3166     {
3167     case 0:
3168       break;
3169       
3170     case VNET_API_ERROR_NO_SUCH_FIB:
3171       return clib_error_return (0, "no such FIB table %d", table_id);
3172       
3173     default:
3174       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3175       break;
3176     }
3177   
3178   return 0;
3179 }
3180  
3181 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3182   .path = "set ip flow-hash",
3183   .short_help = 
3184   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3185   .function = set_ip_flow_hash_command_fn,
3186 };
3187  
3188 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3189                                  u32 table_index)
3190 {
3191   vnet_main_t * vnm = vnet_get_main();
3192   vnet_interface_main_t * im = &vnm->interface_main;
3193   ip4_main_t * ipm = &ip4_main;
3194   ip_lookup_main_t * lm = &ipm->lookup_main;
3195   vnet_classify_main_t * cm = &vnet_classify_main;
3196
3197   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3198     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3199
3200   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3201     return VNET_API_ERROR_NO_SUCH_ENTRY;
3202
3203   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3204   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3205
3206   return 0;
3207 }
3208
3209 static clib_error_t *
3210 set_ip_classify_command_fn (vlib_main_t * vm,
3211                             unformat_input_t * input,
3212                             vlib_cli_command_t * cmd)
3213 {
3214   u32 table_index = ~0;
3215   int table_index_set = 0;
3216   u32 sw_if_index = ~0;
3217   int rv;
3218   
3219   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3220     if (unformat (input, "table-index %d", &table_index))
3221       table_index_set = 1;
3222     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3223                        vnet_get_main(), &sw_if_index))
3224       ;
3225     else
3226       break;
3227   }
3228       
3229   if (table_index_set == 0)
3230     return clib_error_return (0, "classify table-index must be specified");
3231
3232   if (sw_if_index == ~0)
3233     return clib_error_return (0, "interface / subif must be specified");
3234
3235   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3236
3237   switch (rv)
3238     {
3239     case 0:
3240       break;
3241
3242     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3243       return clib_error_return (0, "No such interface");
3244
3245     case VNET_API_ERROR_NO_SUCH_ENTRY:
3246       return clib_error_return (0, "No such classifier table");
3247     }
3248   return 0;
3249 }
3250
3251 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3252     .path = "set ip classify",
3253     .short_help = 
3254     "set ip classify intfc <int> table-index <index>",
3255     .function = set_ip_classify_command_fn,
3256 };
3257