e9e5232f9e08d24b6cb4432764014c6720dbd30a
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       if (table_index_or_id == ~0) {
107         table_index_or_id = 0;
108         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
109           table_index_or_id++;
110         }
111         return create_fib_with_table_id (im, table_index_or_id);
112       }
113
114       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
115       if (! p)
116         return create_fib_with_table_id (im, table_index_or_id);
117       fib_index = p[0];
118     }
119   return vec_elt_at_index (im->fibs, fib_index);
120 }
121
122 static void
123 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
124                                        ip4_fib_t * fib,
125                                        u32 address_length)
126 {
127   hash_t * h;
128   uword max_index;
129
130   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
131   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
132
133   fib->adj_index_by_dst_address[address_length] =
134     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
135
136   hash_set_flags (fib->adj_index_by_dst_address[address_length],
137                   HASH_FLAG_NO_AUTO_SHRINK);
138
139   h = hash_header (fib->adj_index_by_dst_address[address_length]);
140   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
141
142   /* Initialize new/old hash value vectors. */
143   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
144   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
145 }
146
147 static void
148 ip4_fib_set_adj_index (ip4_main_t * im,
149                        ip4_fib_t * fib,
150                        u32 flags,
151                        u32 dst_address_u32,
152                        u32 dst_address_length,
153                        u32 adj_index)
154 {
155   ip_lookup_main_t * lm = &im->lookup_main;
156   uword * hash;
157
158   if (vec_bytes(fib->old_hash_values))
159     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
160   if (vec_bytes(fib->new_hash_values))
161     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
162   fib->new_hash_values[0] = adj_index;
163
164   /* Make sure adj index is valid. */
165   if (CLIB_DEBUG > 0)
166     (void) ip_get_adjacency (lm, adj_index);
167
168   hash = fib->adj_index_by_dst_address[dst_address_length];
169
170   hash = _hash_set3 (hash, dst_address_u32,
171                      fib->new_hash_values,
172                      fib->old_hash_values);
173
174   fib->adj_index_by_dst_address[dst_address_length] = hash;
175
176   if (vec_len (im->add_del_route_callbacks) > 0)
177     {
178       ip4_add_del_route_callback_t * cb;
179       ip4_address_t d;
180       uword * p;
181
182       d.data_u32 = dst_address_u32;
183       vec_foreach (cb, im->add_del_route_callbacks)
184         if ((flags & cb->required_flags) == cb->required_flags)
185           cb->function (im, cb->function_opaque,
186                         fib, flags,
187                         &d, dst_address_length,
188                         fib->old_hash_values,
189                         fib->new_hash_values);
190
191       p = hash_get (hash, dst_address_u32);
192       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
193     }
194 }
195
196 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
197 {
198   ip_lookup_main_t * lm = &im->lookup_main;
199   ip4_fib_t * fib;
200   u32 dst_address, dst_address_length, adj_index, old_adj_index;
201   uword * hash, is_del;
202   ip4_add_del_route_callback_t * cb;
203
204   /* Either create new adjacency or use given one depending on arguments. */
205   if (a->n_add_adj > 0)
206     {
207       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
208       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
209     }
210   else
211     adj_index = a->adj_index;
212
213   dst_address = a->dst_address.data_u32;
214   dst_address_length = a->dst_address_length;
215   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
216
217   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
218   dst_address &= im->fib_masks[dst_address_length];
219
220   if (! fib->adj_index_by_dst_address[dst_address_length])
221     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
222
223   hash = fib->adj_index_by_dst_address[dst_address_length];
224
225   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
226
227   if (is_del)
228     {
229       fib->old_hash_values[0] = ~0;
230       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
231       fib->adj_index_by_dst_address[dst_address_length] = hash;
232
233       if (vec_len (im->add_del_route_callbacks) > 0
234           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
235         {
236           fib->new_hash_values[0] = ~0;
237           vec_foreach (cb, im->add_del_route_callbacks)
238             if ((a->flags & cb->required_flags) == cb->required_flags)
239               cb->function (im, cb->function_opaque,
240                             fib, a->flags,
241                             &a->dst_address, dst_address_length,
242                             fib->old_hash_values,
243                             fib->new_hash_values);
244         }
245     }
246   else
247     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
248                            adj_index);
249
250   old_adj_index = fib->old_hash_values[0];
251
252   /* Avoid spurious reference count increments */
253   if (old_adj_index == adj_index
254       && adj_index != ~0
255       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
256     {
257       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
258       if (adj->share_count > 0)
259         adj->share_count --;
260     }
261
262   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
263                                is_del ? old_adj_index : adj_index,
264                                is_del);
265
266   /* Delete old adjacency index if present and changed. */
267   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
268       && old_adj_index != ~0
269       && old_adj_index != adj_index)
270     ip_del_adjacency (lm, old_adj_index);
271 }
272
273 void
274 ip4_add_del_route_next_hop (ip4_main_t * im,
275                             u32 flags,
276                             ip4_address_t * dst_address,
277                             u32 dst_address_length,
278                             ip4_address_t * next_hop,
279                             u32 next_hop_sw_if_index,
280                             u32 next_hop_weight, u32 adj_index, 
281                             u32 explicit_fib_index)
282 {
283   vnet_main_t * vnm = vnet_get_main();
284   ip_lookup_main_t * lm = &im->lookup_main;
285   u32 fib_index;
286   ip4_fib_t * fib;
287   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
288   u32 dst_adj_index, nh_adj_index;
289   uword * dst_hash, * dst_result;
290   uword * nh_hash, * nh_result;
291   ip_adjacency_t * dst_adj;
292   ip_multipath_adjacency_t * old_mp, * new_mp;
293   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
294   int is_interface_next_hop;
295   clib_error_t * error = 0;
296
297   if (explicit_fib_index == (u32)~0)
298       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
299   else
300       fib_index = explicit_fib_index;
301
302   fib = vec_elt_at_index (im->fibs, fib_index);
303   
304   /* Lookup next hop to be added or deleted. */
305   is_interface_next_hop = next_hop->data_u32 == 0;
306   if (adj_index == (u32)~0)
307     {
308       if (is_interface_next_hop)
309         {
310           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
311           if (nh_result)
312             nh_adj_index = *nh_result;
313           else
314             {
315               ip_adjacency_t * adj;
316               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
317                                       &nh_adj_index);
318               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
319               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
320               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
321             }
322         }
323       else
324         {
325           nh_hash = fib->adj_index_by_dst_address[32];
326           nh_result = hash_get (nh_hash, next_hop->data_u32);
327           
328           /* Next hop must be known. */
329           if (! nh_result)
330             {
331               ip_adjacency_t * adj;
332
333               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
334                                                         next_hop, 0);
335               adj = ip_get_adjacency (lm, nh_adj_index);
336               /* if ARP interface adjacencty is present, we need to
337                  install ARP adjaceny for specific next hop */
338               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
339                   adj->arp.next_hop.ip4.as_u32 == 0)
340                 {
341                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
342                 }
343               else
344                 {
345                   /* Next hop is not known, so create indirect adj */
346                   ip_adjacency_t add_adj;
347                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
348                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
349                   add_adj.explicit_fib_index = explicit_fib_index;
350                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
351                 }
352             }
353           else
354             nh_adj_index = *nh_result;
355         }
356     }
357   else
358     {
359       nh_adj_index = adj_index;
360     }
361   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
362   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
363
364   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
365   dst_result = hash_get (dst_hash, dst_address_u32);
366   if (dst_result)
367     {
368       dst_adj_index = dst_result[0];
369       dst_adj = ip_get_adjacency (lm, dst_adj_index);
370     }
371   else
372     {
373       /* For deletes destination must be known. */
374       if (is_del)
375         {
376           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
377           error = clib_error_return (0, "unknown destination %U/%d",
378                                      format_ip4_address, dst_address,
379                                      dst_address_length);
380           goto done;
381         }
382
383       dst_adj_index = ~0;
384       dst_adj = 0;
385     }
386
387   /* Ignore adds of X/32 with next hop of X. */
388   if (! is_del
389       && dst_address_length == 32
390       && dst_address->data_u32 == next_hop->data_u32 
391       && adj_index != (u32)~0)
392     {
393       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
394       error = clib_error_return (0, "prefix matches next hop %U/%d",
395                                  format_ip4_address, dst_address,
396                                  dst_address_length);
397       goto done;
398     }
399
400   /* Destination is not known and default weight is set so add route
401      to existing non-multipath adjacency */
402   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
403     {
404       /* create new adjacency */
405       ip4_add_del_route_args_t a;
406       a.table_index_or_table_id = fib_index;
407       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
408                  | IP4_ROUTE_FLAG_FIB_INDEX
409                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
410                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
411                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
412       a.dst_address = dst_address[0];
413       a.dst_address_length = dst_address_length;
414       a.adj_index = nh_adj_index;
415       a.add_adj = 0;
416       a.n_add_adj = 0;
417
418       ip4_add_del_route (im, &a);
419
420       goto done;
421     }
422
423   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
424
425   if (! ip_multipath_adjacency_add_del_next_hop
426       (lm, is_del,
427        old_mp_adj_index,
428        nh_adj_index,
429        next_hop_weight,
430        &new_mp_adj_index))
431     {
432       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
433       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
434                                  format_ip4_address, next_hop);
435       goto done;
436     }
437   
438   old_mp = new_mp = 0;
439   if (old_mp_adj_index != ~0)
440     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
441   if (new_mp_adj_index != ~0)
442     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
443
444   if (old_mp != new_mp)
445     {
446       ip4_add_del_route_args_t a;
447       a.table_index_or_table_id = fib_index;
448       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
449                  | IP4_ROUTE_FLAG_FIB_INDEX
450                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
451                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
452       a.dst_address = dst_address[0];
453       a.dst_address_length = dst_address_length;
454       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
455       a.add_adj = 0;
456       a.n_add_adj = 0;
457
458       ip4_add_del_route (im, &a);
459     }
460
461  done:
462   if (error)
463     clib_error_report (error);
464 }
465
466 void *
467 ip4_get_route (ip4_main_t * im,
468                u32 table_index_or_table_id,
469                u32 flags,
470                u8 * address,
471                u32 address_length)
472 {
473   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
474   u32 dst_address = * (u32 *) address;
475   uword * hash, * p;
476
477   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
478   dst_address &= im->fib_masks[address_length];
479
480   hash = fib->adj_index_by_dst_address[address_length];
481   p = hash_get (hash, dst_address);
482   return (void *) p;
483 }
484
485 void
486 ip4_foreach_matching_route (ip4_main_t * im,
487                             u32 table_index_or_table_id,
488                             u32 flags,
489                             ip4_address_t * address,
490                             u32 address_length,
491                             ip4_address_t ** results,
492                             u8 ** result_lengths)
493 {
494   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
495   u32 dst_address = address->data_u32;
496   u32 this_length = address_length;
497   
498   if (*results)
499     _vec_len (*results) = 0;
500   if (*result_lengths)
501     _vec_len (*result_lengths) = 0;
502
503   while (this_length <= 32 && vec_len (results) == 0)
504     {
505       uword k, v;
506       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
507         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
508           {
509             ip4_address_t a;
510             a.data_u32 = k;
511             vec_add1 (*results, a);
512             vec_add1 (*result_lengths, this_length);
513           }
514       }));
515
516       this_length++;
517     }
518 }
519
520 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
521                                   u32 table_index_or_table_id,
522                                   u32 flags)
523 {
524   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
525   ip_lookup_main_t * lm = &im->lookup_main;
526   u32 i, l;
527   ip4_address_t a;
528   ip4_add_del_route_callback_t * cb;
529   static ip4_address_t * to_delete;
530
531   if (lm->n_adjacency_remaps == 0)
532     return;
533
534   for (l = 0; l <= 32; l++)
535     {
536       hash_pair_t * p;
537       uword * hash = fib->adj_index_by_dst_address[l];
538
539       if (hash_elts (hash) == 0)
540         continue;
541
542       if (to_delete)
543         _vec_len (to_delete) = 0;
544
545       hash_foreach_pair (p, hash, ({
546         u32 adj_index = p->value[0];
547         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
548
549         if (m)
550           {
551             /* Record destination address from hash key. */
552             a.data_u32 = p->key;
553
554             /* New adjacency points to nothing: so delete prefix. */
555             if (m == ~0)
556               vec_add1 (to_delete, a);
557             else
558               {
559                 /* Remap to new adjacency. */
560                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
561
562                 /* Set new adjacency value. */
563                 fib->new_hash_values[0] = p->value[0] = m - 1;
564
565                 vec_foreach (cb, im->add_del_route_callbacks)
566                   if ((flags & cb->required_flags) == cb->required_flags)
567                     cb->function (im, cb->function_opaque,
568                                   fib, flags | IP4_ROUTE_FLAG_ADD,
569                                   &a, l,
570                                   fib->old_hash_values,
571                                   fib->new_hash_values);
572               }
573           }
574       }));
575
576       fib->new_hash_values[0] = ~0;
577       for (i = 0; i < vec_len (to_delete); i++)
578         {
579           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
580           vec_foreach (cb, im->add_del_route_callbacks)
581             if ((flags & cb->required_flags) == cb->required_flags)
582               cb->function (im, cb->function_opaque,
583                             fib, flags | IP4_ROUTE_FLAG_DEL,
584                             &a, l,
585                             fib->old_hash_values,
586                             fib->new_hash_values);
587         }
588     }
589
590   /* Also remap adjacencies in mtrie. */
591   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
592
593   /* Reset mapping table. */
594   vec_zero (lm->adjacency_remap_table);
595
596   /* All remaps have been performed. */
597   lm->n_adjacency_remaps = 0;
598 }
599
600 void ip4_delete_matching_routes (ip4_main_t * im,
601                                  u32 table_index_or_table_id,
602                                  u32 flags,
603                                  ip4_address_t * address,
604                                  u32 address_length)
605 {
606   static ip4_address_t * matching_addresses;
607   static u8 * matching_address_lengths;
608   u32 l, i;
609   ip4_add_del_route_args_t a;
610
611   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
612   a.table_index_or_table_id = table_index_or_table_id;
613   a.adj_index = ~0;
614   a.add_adj = 0;
615   a.n_add_adj = 0;
616
617   for (l = address_length + 1; l <= 32; l++)
618     {
619       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
620                                   address,
621                                   l,
622                                   &matching_addresses,
623                                   &matching_address_lengths);
624       for (i = 0; i < vec_len (matching_addresses); i++)
625         {
626           a.dst_address = matching_addresses[i];
627           a.dst_address_length = matching_address_lengths[i];
628           ip4_add_del_route (im, &a);
629         }
630     }
631
632   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
633 }
634
635 always_inline uword
636 ip4_lookup_inline (vlib_main_t * vm,
637                    vlib_node_runtime_t * node,
638                    vlib_frame_t * frame,
639                    int lookup_for_responses_to_locally_received_packets,
640                    int is_indirect)
641 {
642   ip4_main_t * im = &ip4_main;
643   ip_lookup_main_t * lm = &im->lookup_main;
644   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
645   u32 n_left_from, n_left_to_next, * from, * to_next;
646   ip_lookup_next_t next;
647   u32 cpu_index = os_get_cpu_number();
648
649   from = vlib_frame_vector_args (frame);
650   n_left_from = frame->n_vectors;
651   next = node->cached_next_index;
652
653   while (n_left_from > 0)
654     {
655       vlib_get_next_frame (vm, node, next,
656                            to_next, n_left_to_next);
657
658       while (n_left_from >= 4 && n_left_to_next >= 2)
659         {
660           vlib_buffer_t * p0, * p1;
661           ip4_header_t * ip0, * ip1;
662           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
663           ip_lookup_next_t next0, next1;
664           ip_adjacency_t * adj0, * adj1;
665           ip4_fib_mtrie_t * mtrie0, * mtrie1;
666           ip4_fib_mtrie_leaf_t leaf0, leaf1;
667           ip4_address_t * dst_addr0, *dst_addr1;
668           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
669           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
670           u32 flow_hash_config0, flow_hash_config1;
671           u32 hash_c0, hash_c1;
672           u32 wrong_next;
673
674           /* Prefetch next iteration. */
675           {
676             vlib_buffer_t * p2, * p3;
677
678             p2 = vlib_get_buffer (vm, from[2]);
679             p3 = vlib_get_buffer (vm, from[3]);
680
681             vlib_prefetch_buffer_header (p2, LOAD);
682             vlib_prefetch_buffer_header (p3, LOAD);
683
684             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
685             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
686           }
687
688           pi0 = to_next[0] = from[0];
689           pi1 = to_next[1] = from[1];
690
691           p0 = vlib_get_buffer (vm, pi0);
692           p1 = vlib_get_buffer (vm, pi1);
693
694           ip0 = vlib_buffer_get_current (p0);
695           ip1 = vlib_buffer_get_current (p1);
696
697           if (is_indirect)
698             {
699               ip_adjacency_t * iadj0, * iadj1;
700               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
701               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
702               dst_addr0 = &iadj0->indirect.next_hop.ip4;
703               dst_addr1 = &iadj1->indirect.next_hop.ip4;
704             }
705           else
706             {
707               dst_addr0 = &ip0->dst_address;
708               dst_addr1 = &ip1->dst_address;
709             }
710
711           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
712           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
713           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
714             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
715           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
716             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
717
718
719           if (! lookup_for_responses_to_locally_received_packets)
720             {
721               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
722               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
723
724               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
725
726               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
727               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
728             }
729
730           tcp0 = (void *) (ip0 + 1);
731           tcp1 = (void *) (ip1 + 1);
732
733           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
734                          || ip0->protocol == IP_PROTOCOL_UDP);
735           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
736                          || ip1->protocol == IP_PROTOCOL_UDP);
737
738           if (! lookup_for_responses_to_locally_received_packets)
739             {
740               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
741               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
742             }
743
744           if (! lookup_for_responses_to_locally_received_packets)
745             {
746               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
747               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
748             }
749
750           if (! lookup_for_responses_to_locally_received_packets)
751             {
752               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
753               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
754             }
755
756           if (lookup_for_responses_to_locally_received_packets)
757             {
758               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
759               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
760             }
761           else
762             {
763               /* Handle default route. */
764               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
765               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
766
767               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
768               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
769             }
770
771           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
772                                                            dst_addr0,
773                                                            /* no_default_route */ 0));
774           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
775                                                            dst_addr1,
776                                                            /* no_default_route */ 0));
777           adj0 = ip_get_adjacency (lm, adj_index0);
778           adj1 = ip_get_adjacency (lm, adj_index1);
779
780           next0 = adj0->lookup_next_index;
781           next1 = adj1->lookup_next_index;
782
783           /* Use flow hash to compute multipath adjacency. */
784           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
785           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
786           if (PREDICT_FALSE (adj0->n_adj > 1))
787             {
788               flow_hash_config0 = 
789                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
790               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
791                 ip4_compute_flow_hash (ip0, flow_hash_config0);
792             }
793           if (PREDICT_FALSE(adj1->n_adj > 1))
794             {
795               flow_hash_config1 = 
796                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
797               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
798                 ip4_compute_flow_hash (ip1, flow_hash_config1);
799             }
800
801           ASSERT (adj0->n_adj > 0);
802           ASSERT (adj1->n_adj > 0);
803           ASSERT (is_pow2 (adj0->n_adj));
804           ASSERT (is_pow2 (adj1->n_adj));
805           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
806           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
807
808           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
809           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
810
811           vlib_increment_combined_counter 
812               (cm, cpu_index, adj_index0, 1,
813                vlib_buffer_length_in_chain (vm, p0) 
814                + sizeof(ethernet_header_t));
815           vlib_increment_combined_counter 
816               (cm, cpu_index, adj_index1, 1,
817                vlib_buffer_length_in_chain (vm, p1)
818                + sizeof(ethernet_header_t));
819
820           from += 2;
821           to_next += 2;
822           n_left_to_next -= 2;
823           n_left_from -= 2;
824
825           wrong_next = (next0 != next) + 2*(next1 != next);
826           if (PREDICT_FALSE (wrong_next != 0))
827             {
828               switch (wrong_next)
829                 {
830                 case 1:
831                   /* A B A */
832                   to_next[-2] = pi1;
833                   to_next -= 1;
834                   n_left_to_next += 1;
835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
836                   break;
837
838                 case 2:
839                   /* A A B */
840                   to_next -= 1;
841                   n_left_to_next += 1;
842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
843                   break;
844
845                 case 3:
846                   /* A B C */
847                   to_next -= 2;
848                   n_left_to_next += 2;
849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
851                   if (next0 == next1)
852                     {
853                       /* A B B */
854                       vlib_put_next_frame (vm, node, next, n_left_to_next);
855                       next = next1;
856                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
857                     }
858                 }
859             }
860         }
861     
862       while (n_left_from > 0 && n_left_to_next > 0)
863         {
864           vlib_buffer_t * p0;
865           ip4_header_t * ip0;
866           __attribute__((unused)) tcp_header_t * tcp0;
867           ip_lookup_next_t next0;
868           ip_adjacency_t * adj0;
869           ip4_fib_mtrie_t * mtrie0;
870           ip4_fib_mtrie_leaf_t leaf0;
871           ip4_address_t * dst_addr0;
872           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
873           u32 flow_hash_config0, hash_c0;
874
875           pi0 = from[0];
876           to_next[0] = pi0;
877
878           p0 = vlib_get_buffer (vm, pi0);
879
880           ip0 = vlib_buffer_get_current (p0);
881
882           if (is_indirect)
883             {
884               ip_adjacency_t * iadj0;
885               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
886               dst_addr0 = &iadj0->indirect.next_hop.ip4;
887             }
888           else
889             {
890               dst_addr0 = &ip0->dst_address;
891             }
892
893           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
894           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
895             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
896
897           if (! lookup_for_responses_to_locally_received_packets)
898             {
899               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
900
901               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
902
903               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
904             }
905
906           tcp0 = (void *) (ip0 + 1);
907
908           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
909                          || ip0->protocol == IP_PROTOCOL_UDP);
910
911           if (! lookup_for_responses_to_locally_received_packets)
912             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
913
914           if (! lookup_for_responses_to_locally_received_packets)
915             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
916
917           if (! lookup_for_responses_to_locally_received_packets)
918             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
919
920           if (lookup_for_responses_to_locally_received_packets)
921             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
922           else
923             {
924               /* Handle default route. */
925               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
926               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
927             }
928
929           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
930                                                            dst_addr0,
931                                                            /* no_default_route */ 0));
932
933           adj0 = ip_get_adjacency (lm, adj_index0);
934
935           next0 = adj0->lookup_next_index;
936
937           /* Use flow hash to compute multipath adjacency. */
938           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
939           if (PREDICT_FALSE(adj0->n_adj > 1))
940             {
941               flow_hash_config0 = 
942                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
943
944               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
945                 ip4_compute_flow_hash (ip0, flow_hash_config0);
946             }
947
948           ASSERT (adj0->n_adj > 0);
949           ASSERT (is_pow2 (adj0->n_adj));
950           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
951
952           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
953
954           vlib_increment_combined_counter 
955               (cm, cpu_index, adj_index0, 1,
956                vlib_buffer_length_in_chain (vm, p0)
957                + sizeof(ethernet_header_t));
958
959           from += 1;
960           to_next += 1;
961           n_left_to_next -= 1;
962           n_left_from -= 1;
963
964           if (PREDICT_FALSE (next0 != next))
965             {
966               n_left_to_next += 1;
967               vlib_put_next_frame (vm, node, next, n_left_to_next);
968               next = next0;
969               vlib_get_next_frame (vm, node, next,
970                                    to_next, n_left_to_next);
971               to_next[0] = pi0;
972               to_next += 1;
973               n_left_to_next -= 1;
974             }
975         }
976
977       vlib_put_next_frame (vm, node, next, n_left_to_next);
978     }
979
980   return frame->n_vectors;
981 }
982
983 static uword
984 ip4_lookup (vlib_main_t * vm,
985             vlib_node_runtime_t * node,
986             vlib_frame_t * frame)
987 {
988   return ip4_lookup_inline (vm, node, frame,
989                             /* lookup_for_responses_to_locally_received_packets */ 0,
990                             /* is_indirect */ 0);
991
992 }
993
994 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
995                                         ip_adjacency_t * adj,
996                                         u32 sw_if_index,
997                                         u32 if_address_index)
998 {
999   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1000   ip_lookup_next_t n;
1001   vnet_l3_packet_type_t packet_type;
1002   u32 node_index;
1003
1004   if (hw->hw_class_index == ethernet_hw_interface_class.index
1005       || hw->hw_class_index == srp_hw_interface_class.index)
1006     {
1007       /* 
1008        * We have a bit of a problem in this case. ip4-arp uses
1009        * the rewrite_header.next_index to hand pkts to the
1010        * indicated inteface output node. We can end up in
1011        * ip4_rewrite_local, too, which also pays attention to 
1012        * rewrite_header.next index. Net result: a hack in
1013        * ip4_rewrite_local...
1014        */
1015       n = IP_LOOKUP_NEXT_ARP;
1016       node_index = ip4_arp_node.index;
1017       adj->if_address_index = if_address_index;
1018       adj->arp.next_hop.ip4.as_u32 = 0;
1019       ip46_address_reset(&adj->arp.next_hop);
1020       packet_type = VNET_L3_PACKET_TYPE_ARP;
1021     }
1022   else
1023     {
1024       n = IP_LOOKUP_NEXT_REWRITE;
1025       node_index = ip4_rewrite_node.index;
1026       packet_type = VNET_L3_PACKET_TYPE_IP4;
1027     }
1028
1029   adj->lookup_next_index = n;
1030   vnet_rewrite_for_sw_interface
1031     (vnm,
1032      packet_type,
1033      sw_if_index,
1034      node_index,
1035      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1036      &adj->rewrite_header,
1037      sizeof (adj->rewrite_data));
1038 }
1039
1040 static void
1041 ip4_add_interface_routes (u32 sw_if_index,
1042                           ip4_main_t * im, u32 fib_index,
1043                           ip_interface_address_t * a)
1044 {
1045   vnet_main_t * vnm = vnet_get_main();
1046   ip_lookup_main_t * lm = &im->lookup_main;
1047   ip_adjacency_t * adj;
1048   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1049   ip4_add_del_route_args_t x;
1050   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1051   u32 classify_table_index;
1052
1053   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1054   x.table_index_or_table_id = fib_index;
1055   x.flags = (IP4_ROUTE_FLAG_ADD
1056              | IP4_ROUTE_FLAG_FIB_INDEX
1057              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1058   x.dst_address = address[0];
1059   x.dst_address_length = a->address_length;
1060   x.n_add_adj = 0;
1061   x.add_adj = 0;
1062
1063   a->neighbor_probe_adj_index = ~0;
1064   if (a->address_length < 32)
1065     {
1066       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1067                               &x.adj_index);
1068       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1069       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1070       ip4_add_del_route (im, &x);
1071       a->neighbor_probe_adj_index = x.adj_index;
1072     }
1073   
1074   /* Add e.g. 1.1.1.1/32 as local to this host. */
1075   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1076                           &x.adj_index);
1077   
1078   classify_table_index = ~0;
1079   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1080     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1081   if (classify_table_index != (u32) ~0)
1082     {
1083       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1084       adj->classify.table_index = classify_table_index;
1085     }
1086   else
1087     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1088   
1089   adj->if_address_index = a - lm->if_address_pool;
1090   adj->rewrite_header.sw_if_index = sw_if_index;
1091   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1092   /* 
1093    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1094    * fail an RPF-ish check, but still go thru the rewrite code...
1095    */
1096   adj->rewrite_header.data_bytes = 0;
1097
1098   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1099   x.dst_address_length = 32;
1100   ip4_add_del_route (im, &x);
1101 }
1102
1103 static void
1104 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1105 {
1106   ip4_add_del_route_args_t x;
1107
1108   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1109   x.table_index_or_table_id = fib_index;
1110   x.flags = (IP4_ROUTE_FLAG_DEL
1111              | IP4_ROUTE_FLAG_FIB_INDEX
1112              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1113   x.dst_address = address[0];
1114   x.dst_address_length = address_length;
1115   x.adj_index = ~0;
1116   x.n_add_adj = 0;
1117   x.add_adj = 0;
1118
1119   if (address_length < 32)
1120     ip4_add_del_route (im, &x);
1121
1122   x.dst_address_length = 32;
1123   ip4_add_del_route (im, &x);
1124
1125   ip4_delete_matching_routes (im,
1126                               fib_index,
1127                               IP4_ROUTE_FLAG_FIB_INDEX,
1128                               address,
1129                               address_length);
1130 }
1131
1132 typedef struct {
1133     u32 sw_if_index;
1134     ip4_address_t address;
1135     u32 length;
1136 } ip4_interface_address_t;
1137
1138 static clib_error_t *
1139 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1140                                         u32 sw_if_index,
1141                                         ip4_address_t * new_address,
1142                                         u32 new_length,
1143                                         u32 redistribute,
1144                                         u32 insert_routes,
1145                                         u32 is_del);
1146
1147 static clib_error_t *
1148 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1149                                         u32 sw_if_index,
1150                                         ip4_address_t * address,
1151                                         u32 address_length,
1152                                         u32 redistribute,
1153                                         u32 insert_routes,
1154                                         u32 is_del)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip4_main_t * im = &ip4_main;
1158   ip_lookup_main_t * lm = &im->lookup_main;
1159   clib_error_t * error = 0;
1160   u32 if_address_index, elts_before;
1161   ip4_address_fib_t ip4_af, * addr_fib = 0;
1162
1163   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1164   ip4_addr_fib_init (&ip4_af, address,
1165                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1166   vec_add1 (addr_fib, ip4_af);
1167
1168   /* When adding an address check that it does not conflict with an existing address. */
1169   if (! is_del)
1170     {
1171       ip_interface_address_t * ia;
1172       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1173                                     0 /* honor unnumbered */,
1174       ({
1175         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1176
1177         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1178             || ip4_destination_matches_route (im, x, address, address_length))
1179           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1180                                     format_ip4_address_and_length, address, address_length,
1181                                     format_ip4_address_and_length, x, ia->address_length,
1182                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1183       }));
1184     }
1185
1186   elts_before = pool_elts (lm->if_address_pool);
1187
1188   error = ip_interface_address_add_del
1189     (lm,
1190      sw_if_index,
1191      addr_fib,
1192      address_length,
1193      is_del,
1194      &if_address_index);
1195   if (error)
1196     goto done;
1197   
1198   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1199     {
1200       if (is_del)
1201         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1202                                   address_length);
1203       
1204       else
1205           ip4_add_interface_routes (sw_if_index,
1206                                     im, ip4_af.fib_index,
1207                                     pool_elt_at_index 
1208                                     (lm->if_address_pool, if_address_index));
1209     }
1210
1211   /* If pool did not grow/shrink: add duplicate address. */
1212   if (elts_before != pool_elts (lm->if_address_pool))
1213     {
1214       ip4_add_del_interface_address_callback_t * cb;
1215       vec_foreach (cb, im->add_del_interface_address_callbacks)
1216         cb->function (im, cb->function_opaque, sw_if_index,
1217                       address, address_length,
1218                       if_address_index,
1219                       is_del);
1220     }
1221
1222  done:
1223   vec_free (addr_fib);
1224   return error;
1225 }
1226
1227 clib_error_t *
1228 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1229                                ip4_address_t * address, u32 address_length,
1230                                u32 is_del)
1231 {
1232   return ip4_add_del_interface_address_internal
1233     (vm, sw_if_index, address, address_length,
1234      /* redistribute */ 1,
1235      /* insert_routes */ 1,
1236      is_del);
1237 }
1238
1239 static clib_error_t *
1240 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1241                                 u32 sw_if_index,
1242                                 u32 flags)
1243 {
1244   ip4_main_t * im = &ip4_main;
1245   ip_interface_address_t * ia;
1246   ip4_address_t * a;
1247   u32 is_admin_up, fib_index;
1248   
1249   /* Fill in lookup tables with default table (0). */
1250   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1251   
1252   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1253   
1254   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1255   
1256   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1257
1258   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1259                                 0 /* honor unnumbered */,
1260   ({
1261     a = ip_interface_address_get_address (&im->lookup_main, ia);
1262     if (is_admin_up)
1263       ip4_add_interface_routes (sw_if_index,
1264                                 im, fib_index,
1265                                 ia);
1266     else
1267       ip4_del_interface_routes (im, fib_index,
1268                                 a, ia->address_length);
1269   }));
1270
1271   return 0;
1272 }
1273  
1274 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1275
1276 static clib_error_t *
1277 ip4_sw_interface_add_del (vnet_main_t * vnm,
1278                           u32 sw_if_index,
1279                           u32 is_add)
1280 {
1281   vlib_main_t * vm = vnm->vlib_main;
1282   ip4_main_t * im = &ip4_main;
1283   ip_lookup_main_t * lm = &im->lookup_main;
1284   u32 ci, cast;
1285
1286   for (cast = 0; cast < VNET_N_CAST; cast++)
1287     {
1288       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1289       vnet_config_main_t * vcm = &cm->config_main;
1290
1291       if (! vcm->node_index_by_feature_index)
1292         {
1293           if (cast == VNET_UNICAST)
1294             {
1295               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1296               static char * feature_nodes[] = {
1297                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1298                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1299                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1300                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1301                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1302                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1303               };
1304
1305               vnet_config_init (vm, vcm,
1306                                 start_nodes, ARRAY_LEN (start_nodes),
1307                                 feature_nodes, ARRAY_LEN (feature_nodes));
1308             }
1309           else
1310             {
1311               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1312               static char * feature_nodes[] = {
1313                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1314                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1315               };
1316
1317               vnet_config_init (vm, vcm,
1318                                 start_nodes, ARRAY_LEN (start_nodes),
1319                                 feature_nodes, ARRAY_LEN (feature_nodes));
1320             }
1321         }
1322
1323       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1324       ci = cm->config_index_by_sw_if_index[sw_if_index];
1325
1326       if (is_add)
1327         ci = vnet_config_add_feature (vm, vcm,
1328                                       ci,
1329                                       IP4_RX_FEATURE_LOOKUP,
1330                                       /* config data */ 0,
1331                                       /* # bytes of config data */ 0);
1332       else
1333         ci = vnet_config_del_feature (vm, vcm,
1334                                       ci,
1335                                       IP4_RX_FEATURE_LOOKUP,
1336                                       /* config data */ 0,
1337                                       /* # bytes of config data */ 0);
1338
1339       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1340     }
1341
1342   return /* no error */ 0;
1343 }
1344
1345 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1346
1347
1348 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1349   .function = ip4_lookup,
1350   .name = "ip4-lookup",
1351   .vector_size = sizeof (u32),
1352
1353   .n_next_nodes = IP_LOOKUP_N_NEXT,
1354   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1355 };
1356
1357 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1358
1359 static uword
1360 ip4_indirect (vlib_main_t * vm,
1361                vlib_node_runtime_t * node,
1362                vlib_frame_t * frame)
1363 {
1364   return ip4_lookup_inline (vm, node, frame,
1365                             /* lookup_for_responses_to_locally_received_packets */ 0,
1366                             /* is_indirect */ 1);
1367 }
1368
1369 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1370   .function = ip4_indirect,
1371   .name = "ip4-indirect",
1372   .vector_size = sizeof (u32),
1373
1374   .n_next_nodes = IP_LOOKUP_N_NEXT,
1375   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1376 };
1377
1378 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1379
1380
1381 /* Global IP4 main. */
1382 ip4_main_t ip4_main;
1383
1384 clib_error_t *
1385 ip4_lookup_init (vlib_main_t * vm)
1386 {
1387   ip4_main_t * im = &ip4_main;
1388   uword i;
1389
1390   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1391     {
1392       u32 m;
1393
1394       if (i < 32)
1395         m = pow2_mask (i) << (32 - i);
1396       else 
1397         m = ~0;
1398       im->fib_masks[i] = clib_host_to_net_u32 (m);
1399     }
1400
1401   /* Create FIB with index 0 and table id of 0. */
1402   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1403
1404   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1405
1406   {
1407     pg_node_t * pn;
1408     pn = pg_get_node (ip4_lookup_node.index);
1409     pn->unformat_edit = unformat_pg_ip4_header;
1410   }
1411
1412   {
1413     ethernet_arp_header_t h;
1414
1415     memset (&h, 0, sizeof (h));
1416
1417     /* Set target ethernet address to all zeros. */
1418     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1419
1420 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1421 #define _8(f,v) h.f = v;
1422     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1423     _16 (l3_type, ETHERNET_TYPE_IP4);
1424     _8 (n_l2_address_bytes, 6);
1425     _8 (n_l3_address_bytes, 4);
1426     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1427 #undef _16
1428 #undef _8
1429
1430     vlib_packet_template_init (vm,
1431                                &im->ip4_arp_request_packet_template,
1432                                /* data */ &h,
1433                                sizeof (h),
1434                                /* alloc chunk size */ 8,
1435                                "ip4 arp");
1436   }
1437
1438   return 0;
1439 }
1440
1441 VLIB_INIT_FUNCTION (ip4_lookup_init);
1442
1443 typedef struct {
1444   /* Adjacency taken. */
1445   u32 adj_index;
1446   u32 flow_hash;
1447   u32 fib_index;
1448
1449   /* Packet data, possibly *after* rewrite. */
1450   u8 packet_data[64 - 1*sizeof(u32)];
1451 } ip4_forward_next_trace_t;
1452
1453 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1454 {
1455   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1456   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1457   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1458   vnet_main_t * vnm = vnet_get_main();
1459   ip4_main_t * im = &ip4_main;
1460   ip_adjacency_t * adj;
1461   uword indent = format_get_indent (s);
1462
1463   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1464   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1465               t->fib_index, t->adj_index, format_ip_adjacency,
1466               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1467   switch (adj->lookup_next_index)
1468     {
1469     case IP_LOOKUP_NEXT_REWRITE:
1470       s = format (s, "\n%U%U",
1471                   format_white_space, indent,
1472                   format_ip_adjacency_packet_data,
1473                   vnm, &im->lookup_main, t->adj_index,
1474                   t->packet_data, sizeof (t->packet_data));
1475       break;
1476
1477     default:
1478       break;
1479     }
1480
1481   return s;
1482 }
1483
1484 /* Common trace function for all ip4-forward next nodes. */
1485 void
1486 ip4_forward_next_trace (vlib_main_t * vm,
1487                         vlib_node_runtime_t * node,
1488                         vlib_frame_t * frame,
1489                         vlib_rx_or_tx_t which_adj_index)
1490 {
1491   u32 * from, n_left;
1492   ip4_main_t * im = &ip4_main;
1493
1494   n_left = frame->n_vectors;
1495   from = vlib_frame_vector_args (frame);
1496   
1497   while (n_left >= 4)
1498     {
1499       u32 bi0, bi1;
1500       vlib_buffer_t * b0, * b1;
1501       ip4_forward_next_trace_t * t0, * t1;
1502
1503       /* Prefetch next iteration. */
1504       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1505       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1506
1507       bi0 = from[0];
1508       bi1 = from[1];
1509
1510       b0 = vlib_get_buffer (vm, bi0);
1511       b1 = vlib_get_buffer (vm, bi1);
1512
1513       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1514         {
1515           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1516           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1517           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1518           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1519                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1520           clib_memcpy (t0->packet_data,
1521                   vlib_buffer_get_current (b0),
1522                   sizeof (t0->packet_data));
1523         }
1524       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1525         {
1526           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1527           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1528           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1529           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1530                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1531           clib_memcpy (t1->packet_data,
1532                   vlib_buffer_get_current (b1),
1533                   sizeof (t1->packet_data));
1534         }
1535       from += 2;
1536       n_left -= 2;
1537     }
1538
1539   while (n_left >= 1)
1540     {
1541       u32 bi0;
1542       vlib_buffer_t * b0;
1543       ip4_forward_next_trace_t * t0;
1544
1545       bi0 = from[0];
1546
1547       b0 = vlib_get_buffer (vm, bi0);
1548
1549       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1550         {
1551           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1552           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1553           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1554           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1555                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1556           clib_memcpy (t0->packet_data,
1557                   vlib_buffer_get_current (b0),
1558                   sizeof (t0->packet_data));
1559         }
1560       from += 1;
1561       n_left -= 1;
1562     }
1563 }
1564
1565 static uword
1566 ip4_drop_or_punt (vlib_main_t * vm,
1567                   vlib_node_runtime_t * node,
1568                   vlib_frame_t * frame,
1569                   ip4_error_t error_code)
1570 {
1571   u32 * buffers = vlib_frame_vector_args (frame);
1572   uword n_packets = frame->n_vectors;
1573
1574   vlib_error_drop_buffers (vm, node,
1575                            buffers,
1576                            /* stride */ 1,
1577                            n_packets,
1578                            /* next */ 0,
1579                            ip4_input_node.index,
1580                            error_code);
1581
1582   if (node->flags & VLIB_NODE_FLAG_TRACE)
1583     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1584
1585   return n_packets;
1586 }
1587
1588 static uword
1589 ip4_drop (vlib_main_t * vm,
1590           vlib_node_runtime_t * node,
1591           vlib_frame_t * frame)
1592 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1593
1594 static uword
1595 ip4_punt (vlib_main_t * vm,
1596           vlib_node_runtime_t * node,
1597           vlib_frame_t * frame)
1598 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1599
1600 static uword
1601 ip4_miss (vlib_main_t * vm,
1602           vlib_node_runtime_t * node,
1603           vlib_frame_t * frame)
1604 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1605
1606 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1607   .function = ip4_drop,
1608   .name = "ip4-drop",
1609   .vector_size = sizeof (u32),
1610
1611   .format_trace = format_ip4_forward_next_trace,
1612
1613   .n_next_nodes = 1,
1614   .next_nodes = {
1615     [0] = "error-drop",
1616   },
1617 };
1618
1619 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1620
1621 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1622   .function = ip4_punt,
1623   .name = "ip4-punt",
1624   .vector_size = sizeof (u32),
1625
1626   .format_trace = format_ip4_forward_next_trace,
1627
1628   .n_next_nodes = 1,
1629   .next_nodes = {
1630     [0] = "error-punt",
1631   },
1632 };
1633
1634 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1635
1636 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1637   .function = ip4_miss,
1638   .name = "ip4-miss",
1639   .vector_size = sizeof (u32),
1640
1641   .format_trace = format_ip4_forward_next_trace,
1642
1643   .n_next_nodes = 1,
1644   .next_nodes = {
1645     [0] = "error-drop",
1646   },
1647 };
1648
1649 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1650
1651 /* Compute TCP/UDP/ICMP4 checksum in software. */
1652 u16
1653 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1654                               ip4_header_t * ip0)
1655 {
1656   ip_csum_t sum0;
1657   u32 ip_header_length, payload_length_host_byte_order;
1658   u32 n_this_buffer, n_bytes_left;
1659   u16 sum16;
1660   void * data_this_buffer;
1661   
1662   /* Initialize checksum with ip header. */
1663   ip_header_length = ip4_header_bytes (ip0);
1664   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1665   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1666
1667   if (BITS (uword) == 32)
1668     {
1669       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1670       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1671     }
1672   else
1673     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1674
1675   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1676   data_this_buffer = (void *) ip0 + ip_header_length;
1677   if (n_this_buffer + ip_header_length > p0->current_length)
1678     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1679   while (1)
1680     {
1681       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1682       n_bytes_left -= n_this_buffer;
1683       if (n_bytes_left == 0)
1684         break;
1685
1686       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1687       p0 = vlib_get_buffer (vm, p0->next_buffer);
1688       data_this_buffer = vlib_buffer_get_current (p0);
1689       n_this_buffer = p0->current_length;
1690     }
1691
1692   sum16 = ~ ip_csum_fold (sum0);
1693
1694   return sum16;
1695 }
1696
1697 static u32
1698 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1699 {
1700   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1701   udp_header_t * udp0;
1702   u16 sum16;
1703
1704   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1705           || ip0->protocol == IP_PROTOCOL_UDP);
1706
1707   udp0 = (void *) (ip0 + 1);
1708   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1709     {
1710       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1711                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1712       return p0->flags;
1713     }
1714
1715   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1716
1717   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1718                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1719
1720   return p0->flags;
1721 }
1722
1723 static uword
1724 ip4_local (vlib_main_t * vm,
1725            vlib_node_runtime_t * node,
1726            vlib_frame_t * frame)
1727 {
1728   ip4_main_t * im = &ip4_main;
1729   ip_lookup_main_t * lm = &im->lookup_main;
1730   ip_local_next_t next_index;
1731   u32 * from, * to_next, n_left_from, n_left_to_next;
1732   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1733
1734   from = vlib_frame_vector_args (frame);
1735   n_left_from = frame->n_vectors;
1736   next_index = node->cached_next_index;
1737   
1738   if (node->flags & VLIB_NODE_FLAG_TRACE)
1739     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1740
1741   while (n_left_from > 0)
1742     {
1743       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1744
1745       while (n_left_from >= 4 && n_left_to_next >= 2)
1746         {
1747           vlib_buffer_t * p0, * p1;
1748           ip4_header_t * ip0, * ip1;
1749           udp_header_t * udp0, * udp1;
1750           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1751           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1752           ip_adjacency_t * adj0, * adj1;
1753           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1754           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1755           i32 len_diff0, len_diff1;
1756           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1757           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1758           u8 enqueue_code;
1759       
1760           pi0 = to_next[0] = from[0];
1761           pi1 = to_next[1] = from[1];
1762           from += 2;
1763           n_left_from -= 2;
1764           to_next += 2;
1765           n_left_to_next -= 2;
1766       
1767           p0 = vlib_get_buffer (vm, pi0);
1768           p1 = vlib_get_buffer (vm, pi1);
1769
1770           ip0 = vlib_buffer_get_current (p0);
1771           ip1 = vlib_buffer_get_current (p1);
1772
1773           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1774                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1775           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1776                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1777
1778           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1779           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1780
1781           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1782
1783           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1784           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1785
1786           /* Treat IP frag packets as "experimental" protocol for now
1787              until support of IP frag reassembly is implemented */
1788           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1789           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1790           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1791           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1792           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1793           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1794
1795           flags0 = p0->flags;
1796           flags1 = p1->flags;
1797
1798           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1799           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1800
1801           udp0 = ip4_next_header (ip0);
1802           udp1 = ip4_next_header (ip1);
1803
1804           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1805           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1806           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1807
1808           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1809           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1810
1811           /* Verify UDP length. */
1812           ip_len0 = clib_net_to_host_u16 (ip0->length);
1813           ip_len1 = clib_net_to_host_u16 (ip1->length);
1814           udp_len0 = clib_net_to_host_u16 (udp0->length);
1815           udp_len1 = clib_net_to_host_u16 (udp1->length);
1816
1817           len_diff0 = ip_len0 - udp_len0;
1818           len_diff1 = ip_len1 - udp_len1;
1819
1820           len_diff0 = is_udp0 ? len_diff0 : 0;
1821           len_diff1 = is_udp1 ? len_diff1 : 0;
1822
1823           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1824                                 & good_tcp_udp0 & good_tcp_udp1)))
1825             {
1826               if (is_tcp_udp0)
1827                 {
1828                   if (is_tcp_udp0
1829                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1830                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1831                   good_tcp_udp0 =
1832                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1833                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1834                 }
1835               if (is_tcp_udp1)
1836                 {
1837                   if (is_tcp_udp1
1838                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1839                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1840                   good_tcp_udp1 =
1841                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1842                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1843                 }
1844             }
1845
1846           good_tcp_udp0 &= len_diff0 >= 0;
1847           good_tcp_udp1 &= len_diff1 >= 0;
1848
1849           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1850           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1851
1852           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1853
1854           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1855           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1856
1857           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1858           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1859                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1860                     : error0);
1861           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1862                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1863                     : error1);
1864
1865           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1866           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1867
1868           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1869           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1870
1871           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1872           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1873
1874           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1875                                                            &ip0->src_address,
1876                                                            /* no_default_route */ 1));
1877           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1878                                                            &ip1->src_address,
1879                                                            /* no_default_route */ 1));
1880
1881           adj0 = ip_get_adjacency (lm, adj_index0);
1882           adj1 = ip_get_adjacency (lm, adj_index1);
1883
1884           /* 
1885            * Must have a route to source otherwise we drop the packet.
1886            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1887            */
1888           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1889                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1890                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1891                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1892                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1893                     ? IP4_ERROR_SRC_LOOKUP_MISS
1894                     : error0);
1895           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1896                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1897                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1898                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1899                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1900                     ? IP4_ERROR_SRC_LOOKUP_MISS
1901                     : error1);
1902
1903           next0 = lm->local_next_by_ip_protocol[proto0];
1904           next1 = lm->local_next_by_ip_protocol[proto1];
1905
1906           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1907           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1908
1909           p0->error = error0 ? error_node->errors[error0] : 0;
1910           p1->error = error1 ? error_node->errors[error1] : 0;
1911
1912           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1913
1914           if (PREDICT_FALSE (enqueue_code != 0))
1915             {
1916               switch (enqueue_code)
1917                 {
1918                 case 1:
1919                   /* A B A */
1920                   to_next[-2] = pi1;
1921                   to_next -= 1;
1922                   n_left_to_next += 1;
1923                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1924                   break;
1925
1926                 case 2:
1927                   /* A A B */
1928                   to_next -= 1;
1929                   n_left_to_next += 1;
1930                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1931                   break;
1932
1933                 case 3:
1934                   /* A B B or A B C */
1935                   to_next -= 2;
1936                   n_left_to_next += 2;
1937                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1938                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1939                   if (next0 == next1)
1940                     {
1941                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1942                       next_index = next1;
1943                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1944                     }
1945                   break;
1946                 }
1947             }
1948         }
1949
1950       while (n_left_from > 0 && n_left_to_next > 0)
1951         {
1952           vlib_buffer_t * p0;
1953           ip4_header_t * ip0;
1954           udp_header_t * udp0;
1955           ip4_fib_mtrie_t * mtrie0;
1956           ip4_fib_mtrie_leaf_t leaf0;
1957           ip_adjacency_t * adj0;
1958           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1959           i32 len_diff0;
1960           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1961       
1962           pi0 = to_next[0] = from[0];
1963           from += 1;
1964           n_left_from -= 1;
1965           to_next += 1;
1966           n_left_to_next -= 1;
1967       
1968           p0 = vlib_get_buffer (vm, pi0);
1969
1970           ip0 = vlib_buffer_get_current (p0);
1971
1972           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1973                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1974
1975           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1976
1977           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1978
1979           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1980
1981           /* Treat IP frag packets as "experimental" protocol for now
1982              until support of IP frag reassembly is implemented */
1983           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1984           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1985           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1986
1987           flags0 = p0->flags;
1988
1989           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1990
1991           udp0 = ip4_next_header (ip0);
1992
1993           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1994           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1995
1996           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1997
1998           /* Verify UDP length. */
1999           ip_len0 = clib_net_to_host_u16 (ip0->length);
2000           udp_len0 = clib_net_to_host_u16 (udp0->length);
2001
2002           len_diff0 = ip_len0 - udp_len0;
2003
2004           len_diff0 = is_udp0 ? len_diff0 : 0;
2005
2006           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2007             {
2008               if (is_tcp_udp0)
2009                 {
2010                   if (is_tcp_udp0
2011                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2012                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2013                   good_tcp_udp0 =
2014                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2015                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2016                 }
2017             }
2018
2019           good_tcp_udp0 &= len_diff0 >= 0;
2020
2021           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2022
2023           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2024
2025           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2026
2027           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2028           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2029                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2030                     : error0);
2031
2032           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2033
2034           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2035           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2036
2037           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2038                                                            &ip0->src_address,
2039                                                            /* no_default_route */ 1));
2040
2041           adj0 = ip_get_adjacency (lm, adj_index0);
2042
2043           /* Must have a route to source otherwise we drop the packet. */
2044           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2045                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2046                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2047                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2048                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2049                     ? IP4_ERROR_SRC_LOOKUP_MISS
2050                     : error0);
2051
2052           next0 = lm->local_next_by_ip_protocol[proto0];
2053
2054           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2055
2056           p0->error = error0? error_node->errors[error0] : 0;
2057
2058           if (PREDICT_FALSE (next0 != next_index))
2059             {
2060               n_left_to_next += 1;
2061               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2062
2063               next_index = next0;
2064               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2065               to_next[0] = pi0;
2066               to_next += 1;
2067               n_left_to_next -= 1;
2068             }
2069         }
2070   
2071       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2072     }
2073
2074   return frame->n_vectors;
2075 }
2076
2077 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2078   .function = ip4_local,
2079   .name = "ip4-local",
2080   .vector_size = sizeof (u32),
2081
2082   .format_trace = format_ip4_forward_next_trace,
2083
2084   .n_next_nodes = IP_LOCAL_N_NEXT,
2085   .next_nodes = {
2086     [IP_LOCAL_NEXT_DROP] = "error-drop",
2087     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2088     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2089     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2090   },
2091 };
2092
2093 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2094
2095 void ip4_register_protocol (u32 protocol, u32 node_index)
2096 {
2097   vlib_main_t * vm = vlib_get_main();
2098   ip4_main_t * im = &ip4_main;
2099   ip_lookup_main_t * lm = &im->lookup_main;
2100
2101   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2102   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2103 }
2104
2105 static clib_error_t *
2106 show_ip_local_command_fn (vlib_main_t * vm,
2107                           unformat_input_t * input,
2108                          vlib_cli_command_t * cmd)
2109 {
2110   ip4_main_t * im = &ip4_main;
2111   ip_lookup_main_t * lm = &im->lookup_main;
2112   int i;
2113
2114   vlib_cli_output (vm, "Protocols handled by ip4_local");
2115   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2116     {
2117       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2118         vlib_cli_output (vm, "%d", i);
2119     }
2120   return 0;
2121 }
2122
2123
2124
2125 VLIB_CLI_COMMAND (show_ip_local, static) = {
2126   .path = "show ip local",
2127   .function = show_ip_local_command_fn,
2128   .short_help = "Show ip local protocol table",
2129 };
2130
2131 static uword
2132 ip4_arp (vlib_main_t * vm,
2133          vlib_node_runtime_t * node,
2134          vlib_frame_t * frame)
2135 {
2136   vnet_main_t * vnm = vnet_get_main();
2137   ip4_main_t * im = &ip4_main;
2138   ip_lookup_main_t * lm = &im->lookup_main;
2139   u32 * from, * to_next_drop;
2140   uword n_left_from, n_left_to_next_drop, next_index;
2141   static f64 time_last_seed_change = -1e100;
2142   static u32 hash_seeds[3];
2143   static uword hash_bitmap[256 / BITS (uword)]; 
2144   f64 time_now;
2145
2146   if (node->flags & VLIB_NODE_FLAG_TRACE)
2147     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2148
2149   time_now = vlib_time_now (vm);
2150   if (time_now - time_last_seed_change > 1e-3)
2151     {
2152       uword i;
2153       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2154                                              sizeof (hash_seeds));
2155       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2156         hash_seeds[i] = r[i];
2157
2158       /* Mark all hash keys as been no-seen before. */
2159       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2160         hash_bitmap[i] = 0;
2161
2162       time_last_seed_change = time_now;
2163     }
2164
2165   from = vlib_frame_vector_args (frame);
2166   n_left_from = frame->n_vectors;
2167   next_index = node->cached_next_index;
2168   if (next_index == IP4_ARP_NEXT_DROP)
2169     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2170
2171   while (n_left_from > 0)
2172     {
2173       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2174                            to_next_drop, n_left_to_next_drop);
2175
2176       while (n_left_from > 0 && n_left_to_next_drop > 0)
2177         {
2178           vlib_buffer_t * p0;
2179           ip4_header_t * ip0;
2180           ethernet_header_t * eh0;
2181           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2182           uword bm0;
2183           ip_adjacency_t * adj0;
2184
2185           pi0 = from[0];
2186
2187           p0 = vlib_get_buffer (vm, pi0);
2188
2189           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2190           adj0 = ip_get_adjacency (lm, adj_index0);
2191           ip0 = vlib_buffer_get_current (p0);
2192
2193           /* If packet destination is not local, send ARP to next hop */
2194           if (adj0->arp.next_hop.ip4.as_u32)
2195             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2196
2197           /* 
2198            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2199            * rewrite to this packet, we need to skip it here.
2200            * Note, to distinguish from src IP addr *.8.6.*, we
2201            * check for a bcast eth dest instead of IPv4 version.
2202            */
2203           eh0 = (ethernet_header_t*)ip0;
2204           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2205             {
2206               u32 vlan_num = 0;
2207               u16 * etype = &eh0->type;
2208               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2209                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2210                 {
2211                   vlan_num += 1;
2212                   etype += 2; //vlan tag also 16 bits, same as etype
2213                 }
2214               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2215                 {
2216                   vlib_buffer_advance (
2217                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2218                   ip0 = vlib_buffer_get_current (p0);
2219                 }
2220             }
2221
2222           a0 = hash_seeds[0];
2223           b0 = hash_seeds[1];
2224           c0 = hash_seeds[2];
2225
2226           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2227           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2228
2229           a0 ^= ip0->dst_address.data_u32;
2230           b0 ^= sw_if_index0;
2231
2232           hash_v3_finalize32 (a0, b0, c0);
2233
2234           c0 &= BITS (hash_bitmap) - 1;
2235           c0 = c0 / BITS (uword);
2236           m0 = (uword) 1 << (c0 % BITS (uword));
2237
2238           bm0 = hash_bitmap[c0];
2239           drop0 = (bm0 & m0) != 0;
2240
2241           /* Mark it as seen. */
2242           hash_bitmap[c0] = bm0 | m0;
2243
2244           from += 1;
2245           n_left_from -= 1;
2246           to_next_drop[0] = pi0;
2247           to_next_drop += 1;
2248           n_left_to_next_drop -= 1;
2249
2250           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2251
2252           if (drop0)
2253             continue;
2254
2255           /* 
2256            * Can happen if the control-plane is programming tables
2257            * with traffic flowing; at least that's today's lame excuse.
2258            */
2259           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2260             {
2261               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2262             }
2263           else
2264           /* Send ARP request. */
2265           {
2266             u32 bi0 = 0;
2267             vlib_buffer_t * b0;
2268             ethernet_arp_header_t * h0;
2269             vnet_hw_interface_t * hw_if0;
2270
2271             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2272
2273             /* Add rewrite/encap string for ARP packet. */
2274             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2275
2276             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2277
2278             /* Src ethernet address in ARP header. */
2279             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2280                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2281
2282             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2283
2284             /* Copy in destination address we are requesting. */
2285             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2286
2287             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2288             b0 = vlib_get_buffer (vm, bi0);
2289             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2290
2291             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2292
2293             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2294           }
2295         }
2296
2297       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2298     }
2299
2300   return frame->n_vectors;
2301 }
2302
2303 static char * ip4_arp_error_strings[] = {
2304   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2305   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2306   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2307   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2308   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2309 };
2310
2311 VLIB_REGISTER_NODE (ip4_arp_node) = {
2312   .function = ip4_arp,
2313   .name = "ip4-arp",
2314   .vector_size = sizeof (u32),
2315
2316   .format_trace = format_ip4_forward_next_trace,
2317
2318   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2319   .error_strings = ip4_arp_error_strings,
2320
2321   .n_next_nodes = IP4_ARP_N_NEXT,
2322   .next_nodes = {
2323     [IP4_ARP_NEXT_DROP] = "error-drop",
2324   },
2325 };
2326
2327 #define foreach_notrace_ip4_arp_error           \
2328 _(DROP)                                         \
2329 _(REQUEST_SENT)                                 \
2330 _(REPLICATE_DROP)                               \
2331 _(REPLICATE_FAIL)
2332
2333 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2334 {
2335   vlib_node_runtime_t *rt = 
2336     vlib_node_get_runtime (vm, ip4_arp_node.index);
2337
2338   /* don't trace ARP request packets */
2339 #define _(a)                                    \
2340     vnet_pcap_drop_trace_filter_add_del         \
2341         (rt->errors[IP4_ARP_ERROR_##a],         \
2342          1 /* is_add */);
2343     foreach_notrace_ip4_arp_error;
2344 #undef _
2345   return 0;
2346 }
2347
2348 VLIB_INIT_FUNCTION(arp_notrace_init);
2349
2350
2351 /* Send an ARP request to see if given destination is reachable on given interface. */
2352 clib_error_t *
2353 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2354 {
2355   vnet_main_t * vnm = vnet_get_main();
2356   ip4_main_t * im = &ip4_main;
2357   ethernet_arp_header_t * h;
2358   ip4_address_t * src;
2359   ip_interface_address_t * ia;
2360   ip_adjacency_t * adj;
2361   vnet_hw_interface_t * hi;
2362   vnet_sw_interface_t * si;
2363   vlib_buffer_t * b;
2364   u32 bi = 0;
2365
2366   si = vnet_get_sw_interface (vnm, sw_if_index);
2367
2368   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2369     {
2370       return clib_error_return (0, "%U: interface %U down",
2371                                 format_ip4_address, dst, 
2372                                 format_vnet_sw_if_index_name, vnm, 
2373                                 sw_if_index);
2374     }
2375
2376   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2377   if (! src)
2378     {
2379       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2380       return clib_error_return 
2381         (0, "no matching interface address for destination %U (interface %U)",
2382          format_ip4_address, dst,
2383          format_vnet_sw_if_index_name, vnm, sw_if_index);
2384     }
2385
2386   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2387
2388   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2389
2390   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2391
2392   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2393
2394   h->ip4_over_ethernet[0].ip4 = src[0];
2395   h->ip4_over_ethernet[1].ip4 = dst[0];
2396
2397   b = vlib_get_buffer (vm, bi);
2398   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2399
2400   /* Add encapsulation string for software interface (e.g. ethernet header). */
2401   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2402   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2403
2404   {
2405     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2406     u32 * to_next = vlib_frame_vector_args (f);
2407     to_next[0] = bi;
2408     f->n_vectors = 1;
2409     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2410   }
2411
2412   return /* no error */ 0;
2413 }
2414
2415 typedef enum {
2416   IP4_REWRITE_NEXT_DROP,
2417   IP4_REWRITE_NEXT_ARP,
2418 } ip4_rewrite_next_t;
2419
2420 always_inline uword
2421 ip4_rewrite_inline (vlib_main_t * vm,
2422                     vlib_node_runtime_t * node,
2423                     vlib_frame_t * frame,
2424                     int rewrite_for_locally_received_packets)
2425 {
2426   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2427   u32 * from = vlib_frame_vector_args (frame);
2428   u32 n_left_from, n_left_to_next, * to_next, next_index;
2429   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2430   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2431
2432   n_left_from = frame->n_vectors;
2433   next_index = node->cached_next_index;
2434   u32 cpu_index = os_get_cpu_number();
2435   
2436   while (n_left_from > 0)
2437     {
2438       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2439
2440       while (n_left_from >= 4 && n_left_to_next >= 2)
2441         {
2442           ip_adjacency_t * adj0, * adj1;
2443           vlib_buffer_t * p0, * p1;
2444           ip4_header_t * ip0, * ip1;
2445           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2446           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2447           u32 next0_override, next1_override;
2448       
2449           if (rewrite_for_locally_received_packets)
2450               next0_override = next1_override = 0;
2451
2452           /* Prefetch next iteration. */
2453           {
2454             vlib_buffer_t * p2, * p3;
2455
2456             p2 = vlib_get_buffer (vm, from[2]);
2457             p3 = vlib_get_buffer (vm, from[3]);
2458
2459             vlib_prefetch_buffer_header (p2, STORE);
2460             vlib_prefetch_buffer_header (p3, STORE);
2461
2462             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2463             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2464           }
2465
2466           pi0 = to_next[0] = from[0];
2467           pi1 = to_next[1] = from[1];
2468
2469           from += 2;
2470           n_left_from -= 2;
2471           to_next += 2;
2472           n_left_to_next -= 2;
2473       
2474           p0 = vlib_get_buffer (vm, pi0);
2475           p1 = vlib_get_buffer (vm, pi1);
2476
2477           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2478           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2479
2480           /* We should never rewrite a pkt using the MISS adjacency */
2481           ASSERT(adj_index0 && adj_index1);
2482
2483           ip0 = vlib_buffer_get_current (p0);
2484           ip1 = vlib_buffer_get_current (p1);
2485
2486           error0 = error1 = IP4_ERROR_NONE;
2487
2488           /* Decrement TTL & update checksum.
2489              Works either endian, so no need for byte swap. */
2490           if (! rewrite_for_locally_received_packets)
2491             {
2492               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2493
2494               /* Input node should have reject packets with ttl 0. */
2495               ASSERT (ip0->ttl > 0);
2496               ASSERT (ip1->ttl > 0);
2497
2498               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2499               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2500
2501               checksum0 += checksum0 >= 0xffff;
2502               checksum1 += checksum1 >= 0xffff;
2503
2504               ip0->checksum = checksum0;
2505               ip1->checksum = checksum1;
2506
2507               ttl0 -= 1;
2508               ttl1 -= 1;
2509
2510               ip0->ttl = ttl0;
2511               ip1->ttl = ttl1;
2512
2513               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2514               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2515
2516               /* Verify checksum. */
2517               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2518               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2519             }
2520
2521           /* Rewrite packet header and updates lengths. */
2522           adj0 = ip_get_adjacency (lm, adj_index0);
2523           adj1 = ip_get_adjacency (lm, adj_index1);
2524       
2525           if (rewrite_for_locally_received_packets)
2526             {
2527               /*
2528                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2529                * we end up here with a local adjacency in hand
2530                * The local adj rewrite data is 0xfefe on purpose.
2531                * Bad engineer, no donut for you.
2532                */
2533               if (PREDICT_FALSE(adj0->lookup_next_index 
2534                                 == IP_LOOKUP_NEXT_LOCAL))
2535                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2536               if (PREDICT_FALSE(adj0->lookup_next_index
2537                                 == IP_LOOKUP_NEXT_ARP))
2538                 next0_override = IP4_REWRITE_NEXT_ARP;
2539               if (PREDICT_FALSE(adj1->lookup_next_index 
2540                                 == IP_LOOKUP_NEXT_LOCAL))
2541                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2542               if (PREDICT_FALSE(adj1->lookup_next_index
2543                                 == IP_LOOKUP_NEXT_ARP))
2544                 next1_override = IP4_REWRITE_NEXT_ARP;
2545             }
2546
2547           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2548           rw_len0 = adj0[0].rewrite_header.data_bytes;
2549           rw_len1 = adj1[0].rewrite_header.data_bytes;
2550           next0 = (error0 == IP4_ERROR_NONE) 
2551             ? adj0[0].rewrite_header.next_index : 0;
2552
2553           if (rewrite_for_locally_received_packets)
2554               next0 = next0 && next0_override ? next0_override : next0;
2555
2556           next1 = (error1 == IP4_ERROR_NONE)
2557             ? adj1[0].rewrite_header.next_index : 0;
2558
2559           if (rewrite_for_locally_received_packets)
2560               next1 = next1 && next1_override ? next1_override : next1;
2561
2562           /* 
2563            * We've already accounted for an ethernet_header_t elsewhere
2564            */
2565           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2566               vlib_increment_combined_counter 
2567                   (&lm->adjacency_counters,
2568                    cpu_index, adj_index0, 
2569                    /* packet increment */ 0,
2570                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2571
2572           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2573               vlib_increment_combined_counter 
2574                   (&lm->adjacency_counters,
2575                    cpu_index, adj_index1, 
2576                    /* packet increment */ 0,
2577                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2578
2579           /* Check MTU of outgoing interface. */
2580           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2581                     ? IP4_ERROR_MTU_EXCEEDED
2582                     : error0);
2583           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2584                     ? IP4_ERROR_MTU_EXCEEDED
2585                     : error1);
2586
2587           p0->current_data -= rw_len0;
2588           p1->current_data -= rw_len1;
2589
2590           p0->current_length += rw_len0;
2591           p1->current_length += rw_len1;
2592
2593           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2594           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2595       
2596           p0->error = error_node->errors[error0];
2597           p1->error = error_node->errors[error1];
2598
2599           /* Guess we are only writing on simple Ethernet header. */
2600           vnet_rewrite_two_headers (adj0[0], adj1[0],
2601                                     ip0, ip1,
2602                                     sizeof (ethernet_header_t));
2603       
2604           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2605                                            to_next, n_left_to_next,
2606                                            pi0, pi1, next0, next1);
2607         }
2608
2609       while (n_left_from > 0 && n_left_to_next > 0)
2610         {
2611           ip_adjacency_t * adj0;
2612           vlib_buffer_t * p0;
2613           ip4_header_t * ip0;
2614           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2615           u32 next0_override;
2616       
2617           if (rewrite_for_locally_received_packets)
2618               next0_override = 0;
2619
2620           pi0 = to_next[0] = from[0];
2621
2622           p0 = vlib_get_buffer (vm, pi0);
2623
2624           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2625
2626           /* We should never rewrite a pkt using the MISS adjacency */
2627           ASSERT(adj_index0);
2628
2629           adj0 = ip_get_adjacency (lm, adj_index0);
2630       
2631           ip0 = vlib_buffer_get_current (p0);
2632
2633           error0 = IP4_ERROR_NONE;
2634           next0 = 0;            /* drop on error */
2635
2636           /* Decrement TTL & update checksum. */
2637           if (! rewrite_for_locally_received_packets)
2638             {
2639               i32 ttl0 = ip0->ttl;
2640
2641               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2642
2643               checksum0 += checksum0 >= 0xffff;
2644
2645               ip0->checksum = checksum0;
2646
2647               ASSERT (ip0->ttl > 0);
2648
2649               ttl0 -= 1;
2650
2651               ip0->ttl = ttl0;
2652
2653               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2654
2655               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2656             }
2657
2658           if (rewrite_for_locally_received_packets)
2659             {
2660               /*
2661                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2662                * we end up here with a local adjacency in hand
2663                * The local adj rewrite data is 0xfefe on purpose.
2664                * Bad engineer, no donut for you.
2665                */
2666               if (PREDICT_FALSE(adj0->lookup_next_index 
2667                                 == IP_LOOKUP_NEXT_LOCAL))
2668                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2669               /* 
2670                * We have to override the next_index in ARP adjacencies,
2671                * because they're set up for ip4-arp, not this node...
2672                */
2673               if (PREDICT_FALSE(adj0->lookup_next_index
2674                                 == IP_LOOKUP_NEXT_ARP))
2675                 next0_override = IP4_REWRITE_NEXT_ARP;
2676             }
2677
2678           /* Guess we are only writing on simple Ethernet header. */
2679           vnet_rewrite_one_header (adj0[0], ip0, 
2680                                    sizeof (ethernet_header_t));
2681           
2682           /* Update packet buffer attributes/set output interface. */
2683           rw_len0 = adj0[0].rewrite_header.data_bytes;
2684           
2685           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2686               vlib_increment_combined_counter 
2687                   (&lm->adjacency_counters,
2688                    cpu_index, adj_index0, 
2689                    /* packet increment */ 0,
2690                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2691           
2692           /* Check MTU of outgoing interface. */
2693           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2694                     > adj0[0].rewrite_header.max_l3_packet_bytes
2695                     ? IP4_ERROR_MTU_EXCEEDED
2696                     : error0);
2697           
2698           p0->error = error_node->errors[error0];
2699           p0->current_data -= rw_len0;
2700           p0->current_length += rw_len0;
2701           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2702             adj0[0].rewrite_header.sw_if_index;
2703           
2704           next0 = (error0 == IP4_ERROR_NONE)
2705             ? adj0[0].rewrite_header.next_index : 0;
2706
2707           if (rewrite_for_locally_received_packets)
2708               next0 = next0 && next0_override ? next0_override : next0;
2709
2710           from += 1;
2711           n_left_from -= 1;
2712           to_next += 1;
2713           n_left_to_next -= 1;
2714       
2715           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2716                                            to_next, n_left_to_next,
2717                                            pi0, next0);
2718         }
2719   
2720       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2721     }
2722
2723   /* Need to do trace after rewrites to pick up new packet data. */
2724   if (node->flags & VLIB_NODE_FLAG_TRACE)
2725     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2726
2727   return frame->n_vectors;
2728 }
2729
2730 static uword
2731 ip4_rewrite_transit (vlib_main_t * vm,
2732                      vlib_node_runtime_t * node,
2733                      vlib_frame_t * frame)
2734 {
2735   return ip4_rewrite_inline (vm, node, frame,
2736                              /* rewrite_for_locally_received_packets */ 0);
2737 }
2738
2739 static uword
2740 ip4_rewrite_local (vlib_main_t * vm,
2741                    vlib_node_runtime_t * node,
2742                    vlib_frame_t * frame)
2743 {
2744   return ip4_rewrite_inline (vm, node, frame,
2745                              /* rewrite_for_locally_received_packets */ 1);
2746 }
2747
2748 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2749   .function = ip4_rewrite_transit,
2750   .name = "ip4-rewrite-transit",
2751   .vector_size = sizeof (u32),
2752
2753   .format_trace = format_ip4_forward_next_trace,
2754
2755   .n_next_nodes = 2,
2756   .next_nodes = {
2757     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2758     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2759   },
2760 };
2761
2762 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2763
2764 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2765   .function = ip4_rewrite_local,
2766   .name = "ip4-rewrite-local",
2767   .vector_size = sizeof (u32),
2768
2769   .sibling_of = "ip4-rewrite-transit",
2770
2771   .format_trace = format_ip4_forward_next_trace,
2772
2773   .n_next_nodes = 2,
2774   .next_nodes = {
2775     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2776     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2777   },
2778 };
2779
2780 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2781
2782 static clib_error_t *
2783 add_del_interface_table (vlib_main_t * vm,
2784                          unformat_input_t * input,
2785                          vlib_cli_command_t * cmd)
2786 {
2787   vnet_main_t * vnm = vnet_get_main();
2788   clib_error_t * error = 0;
2789   u32 sw_if_index, table_id;
2790
2791   sw_if_index = ~0;
2792
2793   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2794     {
2795       error = clib_error_return (0, "unknown interface `%U'",
2796                                  format_unformat_error, input);
2797       goto done;
2798     }
2799
2800   if (unformat (input, "%d", &table_id))
2801     ;
2802   else
2803     {
2804       error = clib_error_return (0, "expected table id `%U'",
2805                                  format_unformat_error, input);
2806       goto done;
2807     }
2808
2809   {
2810     ip4_main_t * im = &ip4_main;
2811     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2812
2813     if (fib) 
2814       {
2815         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2816         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2817     }
2818   }
2819
2820  done:
2821   return error;
2822 }
2823
2824 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2825   .path = "set interface ip table",
2826   .function = add_del_interface_table,
2827   .short_help = "Add/delete FIB table id for interface",
2828 };
2829
2830
2831 static uword
2832 ip4_lookup_multicast (vlib_main_t * vm,
2833                       vlib_node_runtime_t * node,
2834                       vlib_frame_t * frame)
2835 {
2836   ip4_main_t * im = &ip4_main;
2837   ip_lookup_main_t * lm = &im->lookup_main;
2838   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2839   u32 n_left_from, n_left_to_next, * from, * to_next;
2840   ip_lookup_next_t next;
2841   u32 cpu_index = os_get_cpu_number();
2842
2843   from = vlib_frame_vector_args (frame);
2844   n_left_from = frame->n_vectors;
2845   next = node->cached_next_index;
2846
2847   while (n_left_from > 0)
2848     {
2849       vlib_get_next_frame (vm, node, next,
2850                            to_next, n_left_to_next);
2851
2852       while (n_left_from >= 4 && n_left_to_next >= 2)
2853         {
2854           vlib_buffer_t * p0, * p1;
2855           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2856           ip_lookup_next_t next0, next1;
2857           ip4_header_t * ip0, * ip1;
2858           ip_adjacency_t * adj0, * adj1;
2859           u32 fib_index0, fib_index1;
2860           u32 flow_hash_config0, flow_hash_config1;
2861
2862           /* Prefetch next iteration. */
2863           {
2864             vlib_buffer_t * p2, * p3;
2865
2866             p2 = vlib_get_buffer (vm, from[2]);
2867             p3 = vlib_get_buffer (vm, from[3]);
2868
2869             vlib_prefetch_buffer_header (p2, LOAD);
2870             vlib_prefetch_buffer_header (p3, LOAD);
2871
2872             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2873             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2874           }
2875
2876           pi0 = to_next[0] = from[0];
2877           pi1 = to_next[1] = from[1];
2878
2879           p0 = vlib_get_buffer (vm, pi0);
2880           p1 = vlib_get_buffer (vm, pi1);
2881
2882           ip0 = vlib_buffer_get_current (p0);
2883           ip1 = vlib_buffer_get_current (p1);
2884
2885           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2886           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2887           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2888             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2889           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2890             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2891
2892           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2893                                               &ip0->dst_address, p0);
2894           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2895                                               &ip1->dst_address, p1);
2896
2897           adj0 = ip_get_adjacency (lm, adj_index0);
2898           adj1 = ip_get_adjacency (lm, adj_index1);
2899
2900           next0 = adj0->lookup_next_index;
2901           next1 = adj1->lookup_next_index;
2902
2903           flow_hash_config0 = 
2904               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2905
2906           flow_hash_config1 = 
2907               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2908
2909           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2910               (ip0, flow_hash_config0);
2911                                                                   
2912           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2913               (ip1, flow_hash_config1);
2914
2915           ASSERT (adj0->n_adj > 0);
2916           ASSERT (adj1->n_adj > 0);
2917           ASSERT (is_pow2 (adj0->n_adj));
2918           ASSERT (is_pow2 (adj1->n_adj));
2919           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2920           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2921
2922           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2923           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2924
2925           if (1) /* $$$$$$ HACK FIXME */
2926           vlib_increment_combined_counter 
2927               (cm, cpu_index, adj_index0, 1,
2928                vlib_buffer_length_in_chain (vm, p0));
2929           if (1) /* $$$$$$ HACK FIXME */
2930           vlib_increment_combined_counter 
2931               (cm, cpu_index, adj_index1, 1,
2932                vlib_buffer_length_in_chain (vm, p1));
2933
2934           from += 2;
2935           to_next += 2;
2936           n_left_to_next -= 2;
2937           n_left_from -= 2;
2938
2939           wrong_next = (next0 != next) + 2*(next1 != next);
2940           if (PREDICT_FALSE (wrong_next != 0))
2941             {
2942               switch (wrong_next)
2943                 {
2944                 case 1:
2945                   /* A B A */
2946                   to_next[-2] = pi1;
2947                   to_next -= 1;
2948                   n_left_to_next += 1;
2949                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2950                   break;
2951
2952                 case 2:
2953                   /* A A B */
2954                   to_next -= 1;
2955                   n_left_to_next += 1;
2956                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2957                   break;
2958
2959                 case 3:
2960                   /* A B C */
2961                   to_next -= 2;
2962                   n_left_to_next += 2;
2963                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2964                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2965                   if (next0 == next1)
2966                     {
2967                       /* A B B */
2968                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2969                       next = next1;
2970                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2971                     }
2972                 }
2973             }
2974         }
2975     
2976       while (n_left_from > 0 && n_left_to_next > 0)
2977         {
2978           vlib_buffer_t * p0;
2979           ip4_header_t * ip0;
2980           u32 pi0, adj_index0;
2981           ip_lookup_next_t next0;
2982           ip_adjacency_t * adj0;
2983           u32 fib_index0;
2984           u32 flow_hash_config0;
2985
2986           pi0 = from[0];
2987           to_next[0] = pi0;
2988
2989           p0 = vlib_get_buffer (vm, pi0);
2990
2991           ip0 = vlib_buffer_get_current (p0);
2992
2993           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2994                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2995           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2996               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2997           
2998           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2999                                               &ip0->dst_address, p0);
3000
3001           adj0 = ip_get_adjacency (lm, adj_index0);
3002
3003           next0 = adj0->lookup_next_index;
3004
3005           flow_hash_config0 = 
3006               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3007
3008           vnet_buffer (p0)->ip.flow_hash = 
3009             ip4_compute_flow_hash (ip0, flow_hash_config0);
3010
3011           ASSERT (adj0->n_adj > 0);
3012           ASSERT (is_pow2 (adj0->n_adj));
3013           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3014
3015           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3016
3017           if (1) /* $$$$$$ HACK FIXME */
3018               vlib_increment_combined_counter 
3019                   (cm, cpu_index, adj_index0, 1,
3020                    vlib_buffer_length_in_chain (vm, p0));
3021
3022           from += 1;
3023           to_next += 1;
3024           n_left_to_next -= 1;
3025           n_left_from -= 1;
3026
3027           if (PREDICT_FALSE (next0 != next))
3028             {
3029               n_left_to_next += 1;
3030               vlib_put_next_frame (vm, node, next, n_left_to_next);
3031               next = next0;
3032               vlib_get_next_frame (vm, node, next,
3033                                    to_next, n_left_to_next);
3034               to_next[0] = pi0;
3035               to_next += 1;
3036               n_left_to_next -= 1;
3037             }
3038         }
3039
3040       vlib_put_next_frame (vm, node, next, n_left_to_next);
3041     }
3042
3043   return frame->n_vectors;
3044 }
3045
3046 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3047   .function = ip4_lookup_multicast,
3048   .name = "ip4-lookup-multicast",
3049   .vector_size = sizeof (u32),
3050
3051   .n_next_nodes = IP_LOOKUP_N_NEXT,
3052   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3053 };
3054
3055 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3056
3057 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3058   .function = ip4_drop,
3059   .name = "ip4-multicast",
3060   .vector_size = sizeof (u32),
3061
3062   .format_trace = format_ip4_forward_next_trace,
3063
3064   .n_next_nodes = 1,
3065   .next_nodes = {
3066     [0] = "error-drop",
3067   },
3068 };
3069
3070 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3071 {
3072   ip4_main_t * im = &ip4_main;
3073   ip4_fib_mtrie_t * mtrie0;
3074   ip4_fib_mtrie_leaf_t leaf0;
3075   u32 adj_index0;
3076     
3077   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3078
3079   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3080   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3081   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3082   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3083   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3084   
3085   /* Handle default route. */
3086   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3087   
3088   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3089   
3090   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3091                                                   a, 
3092                                                   /* no_default_route */ 0);
3093 }
3094  
3095 static clib_error_t *
3096 test_lookup_command_fn (vlib_main_t * vm,
3097                         unformat_input_t * input,
3098                         vlib_cli_command_t * cmd)
3099 {
3100   u32 table_id = 0;
3101   f64 count = 1;
3102   u32 n;
3103   int i;
3104   ip4_address_t ip4_base_address;
3105   u64 errors = 0;
3106
3107   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3108       if (unformat (input, "table %d", &table_id))
3109         ;
3110       else if (unformat (input, "count %f", &count))
3111         ;
3112
3113       else if (unformat (input, "%U",
3114                          unformat_ip4_address, &ip4_base_address))
3115         ;
3116       else
3117         return clib_error_return (0, "unknown input `%U'",
3118                                   format_unformat_error, input);
3119   }
3120
3121   n = count;
3122
3123   for (i = 0; i < n; i++)
3124     {
3125       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3126         errors++;
3127
3128       ip4_base_address.as_u32 = 
3129         clib_host_to_net_u32 (1 + 
3130                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3131     }
3132
3133   if (errors) 
3134     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3135   else
3136     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3137
3138   return 0;
3139 }
3140
3141 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3142     .path = "test lookup",
3143     .short_help = "test lookup",
3144     .function = test_lookup_command_fn,
3145 };
3146
3147 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3148 {
3149   ip4_main_t * im4 = &ip4_main;
3150   ip4_fib_t * fib;
3151   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3152
3153   if (p == 0)
3154     return VNET_API_ERROR_NO_SUCH_FIB;
3155
3156   fib = vec_elt_at_index (im4->fibs, p[0]);
3157
3158   fib->flow_hash_config = flow_hash_config;
3159   return 0;
3160 }
3161  
3162 static clib_error_t *
3163 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3164                              unformat_input_t * input,
3165                              vlib_cli_command_t * cmd)
3166 {
3167   int matched = 0;
3168   u32 table_id = 0;
3169   u32 flow_hash_config = 0;
3170   int rv;
3171
3172   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3173     if (unformat (input, "table %d", &table_id))
3174       matched = 1;
3175 #define _(a,v) \
3176     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3177     foreach_flow_hash_bit
3178 #undef _
3179     else break;
3180   }
3181   
3182   if (matched == 0)
3183     return clib_error_return (0, "unknown input `%U'",
3184                               format_unformat_error, input);
3185   
3186   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3187   switch (rv)
3188     {
3189     case 0:
3190       break;
3191       
3192     case VNET_API_ERROR_NO_SUCH_FIB:
3193       return clib_error_return (0, "no such FIB table %d", table_id);
3194       
3195     default:
3196       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3197       break;
3198     }
3199   
3200   return 0;
3201 }
3202  
3203 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3204   .path = "set ip flow-hash",
3205   .short_help = 
3206   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3207   .function = set_ip_flow_hash_command_fn,
3208 };
3209  
3210 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3211                                  u32 table_index)
3212 {
3213   vnet_main_t * vnm = vnet_get_main();
3214   vnet_interface_main_t * im = &vnm->interface_main;
3215   ip4_main_t * ipm = &ip4_main;
3216   ip_lookup_main_t * lm = &ipm->lookup_main;
3217   vnet_classify_main_t * cm = &vnet_classify_main;
3218
3219   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3220     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3221
3222   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3223     return VNET_API_ERROR_NO_SUCH_ENTRY;
3224
3225   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3226   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3227
3228   return 0;
3229 }
3230
3231 static clib_error_t *
3232 set_ip_classify_command_fn (vlib_main_t * vm,
3233                             unformat_input_t * input,
3234                             vlib_cli_command_t * cmd)
3235 {
3236   u32 table_index = ~0;
3237   int table_index_set = 0;
3238   u32 sw_if_index = ~0;
3239   int rv;
3240   
3241   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3242     if (unformat (input, "table-index %d", &table_index))
3243       table_index_set = 1;
3244     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3245                        vnet_get_main(), &sw_if_index))
3246       ;
3247     else
3248       break;
3249   }
3250       
3251   if (table_index_set == 0)
3252     return clib_error_return (0, "classify table-index must be specified");
3253
3254   if (sw_if_index == ~0)
3255     return clib_error_return (0, "interface / subif must be specified");
3256
3257   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3258
3259   switch (rv)
3260     {
3261     case 0:
3262       break;
3263
3264     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3265       return clib_error_return (0, "No such interface");
3266
3267     case VNET_API_ERROR_NO_SUCH_ENTRY:
3268       return clib_error_return (0, "No such classifier table");
3269     }
3270   return 0;
3271 }
3272
3273 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3274     .path = "set ip classify",
3275     .short_help = 
3276     "set ip classify intfc <int> table-index <index>",
3277     .function = set_ip_classify_command_fn,
3278 };
3279