63bc0ed8fbf6ff6075fafe42c999caa834916d19
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
246     {
247       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248       if (adj->share_count > 0)
249         adj->share_count --;
250     }
251
252   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253                                is_del ? old_adj_index : adj_index,
254                                is_del);
255
256   /* Delete old adjacency index if present and changed. */
257   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258       && old_adj_index != ~0
259       && old_adj_index != adj_index)
260     ip_del_adjacency (lm, old_adj_index);
261 }
262
263 void
264 ip4_add_del_route_next_hop (ip4_main_t * im,
265                             u32 flags,
266                             ip4_address_t * dst_address,
267                             u32 dst_address_length,
268                             ip4_address_t * next_hop,
269                             u32 next_hop_sw_if_index,
270                             u32 next_hop_weight, u32 adj_index, 
271                             u32 explicit_fib_index)
272 {
273   vnet_main_t * vnm = vnet_get_main();
274   ip_lookup_main_t * lm = &im->lookup_main;
275   u32 fib_index;
276   ip4_fib_t * fib;
277   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278   u32 dst_adj_index, nh_adj_index;
279   uword * dst_hash, * dst_result;
280   uword * nh_hash, * nh_result;
281   ip_adjacency_t * dst_adj;
282   ip_multipath_adjacency_t * old_mp, * new_mp;
283   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284   int is_interface_next_hop;
285   clib_error_t * error = 0;
286
287   if (explicit_fib_index == (u32)~0)
288       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
289   else
290       fib_index = explicit_fib_index;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293   
294   /* Lookup next hop to be added or deleted. */
295   is_interface_next_hop = next_hop->data_u32 == 0;
296   if (adj_index == (u32)~0)
297     {
298       if (is_interface_next_hop)
299         {
300           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
301           if (nh_result)
302             nh_adj_index = *nh_result;
303           else
304             {
305               ip_adjacency_t * adj;
306               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
307                                       &nh_adj_index);
308               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
311             }
312         }
313       else
314         {
315           nh_hash = fib->adj_index_by_dst_address[32];
316           nh_result = hash_get (nh_hash, next_hop->data_u32);
317           
318           /* Next hop must be known. */
319           if (! nh_result)
320             {
321               ip_adjacency_t * adj;
322
323               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
324                                                         next_hop, 0);
325               adj = ip_get_adjacency (lm, nh_adj_index);
326               /* if ARP interface adjacencty is present, we need to
327                  install ARP adjaceny for specific next hop */
328               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
329                   adj->arp.next_hop.ip4.as_u32 == 0)
330                 {
331                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
332                 }
333               else
334                 {
335                   /* Next hop is not known, so create indirect adj */
336                   ip_adjacency_t add_adj;
337                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
338                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
339                   add_adj.explicit_fib_index = explicit_fib_index;
340                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
341                 }
342             }
343           else
344             nh_adj_index = *nh_result;
345         }
346     }
347   else
348     {
349       nh_adj_index = adj_index;
350     }
351   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
352   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
353
354   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
355   dst_result = hash_get (dst_hash, dst_address_u32);
356   if (dst_result)
357     {
358       dst_adj_index = dst_result[0];
359       dst_adj = ip_get_adjacency (lm, dst_adj_index);
360     }
361   else
362     {
363       /* For deletes destination must be known. */
364       if (is_del)
365         {
366           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
367           error = clib_error_return (0, "unknown destination %U/%d",
368                                      format_ip4_address, dst_address,
369                                      dst_address_length);
370           goto done;
371         }
372
373       dst_adj_index = ~0;
374       dst_adj = 0;
375     }
376
377   /* Ignore adds of X/32 with next hop of X. */
378   if (! is_del
379       && dst_address_length == 32
380       && dst_address->data_u32 == next_hop->data_u32 
381       && adj_index != (u32)~0)
382     {
383       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
384       error = clib_error_return (0, "prefix matches next hop %U/%d",
385                                  format_ip4_address, dst_address,
386                                  dst_address_length);
387       goto done;
388     }
389
390   /* Destination is not known and default weight is set so add route
391      to existing non-multipath adjacency */
392   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
393     {
394       /* create new adjacency */
395       ip4_add_del_route_args_t a;
396       a.table_index_or_table_id = fib_index;
397       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
398                  | IP4_ROUTE_FLAG_FIB_INDEX
399                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
400                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
401                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
402       a.dst_address = dst_address[0];
403       a.dst_address_length = dst_address_length;
404       a.adj_index = nh_adj_index;
405       a.add_adj = 0;
406       a.n_add_adj = 0;
407
408       ip4_add_del_route (im, &a);
409
410       goto done;
411     }
412
413   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
414
415   if (! ip_multipath_adjacency_add_del_next_hop
416       (lm, is_del,
417        old_mp_adj_index,
418        nh_adj_index,
419        next_hop_weight,
420        &new_mp_adj_index))
421     {
422       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
423       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
424                                  format_ip4_address, next_hop);
425       goto done;
426     }
427   
428   old_mp = new_mp = 0;
429   if (old_mp_adj_index != ~0)
430     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
431   if (new_mp_adj_index != ~0)
432     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
433
434   if (old_mp != new_mp)
435     {
436       ip4_add_del_route_args_t a;
437       a.table_index_or_table_id = fib_index;
438       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
439                  | IP4_ROUTE_FLAG_FIB_INDEX
440                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
441                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
442       a.dst_address = dst_address[0];
443       a.dst_address_length = dst_address_length;
444       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
445       a.add_adj = 0;
446       a.n_add_adj = 0;
447
448       ip4_add_del_route (im, &a);
449     }
450
451  done:
452   if (error)
453     clib_error_report (error);
454 }
455
456 void *
457 ip4_get_route (ip4_main_t * im,
458                u32 table_index_or_table_id,
459                u32 flags,
460                u8 * address,
461                u32 address_length)
462 {
463   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
464   u32 dst_address = * (u32 *) address;
465   uword * hash, * p;
466
467   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
468   dst_address &= im->fib_masks[address_length];
469
470   hash = fib->adj_index_by_dst_address[address_length];
471   p = hash_get (hash, dst_address);
472   return (void *) p;
473 }
474
475 void
476 ip4_foreach_matching_route (ip4_main_t * im,
477                             u32 table_index_or_table_id,
478                             u32 flags,
479                             ip4_address_t * address,
480                             u32 address_length,
481                             ip4_address_t ** results,
482                             u8 ** result_lengths)
483 {
484   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
485   u32 dst_address = address->data_u32;
486   u32 this_length = address_length;
487   
488   if (*results)
489     _vec_len (*results) = 0;
490   if (*result_lengths)
491     _vec_len (*result_lengths) = 0;
492
493   while (this_length <= 32 && vec_len (results) == 0)
494     {
495       uword k, v;
496       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
497         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
498           {
499             ip4_address_t a;
500             a.data_u32 = k;
501             vec_add1 (*results, a);
502             vec_add1 (*result_lengths, this_length);
503           }
504       }));
505
506       this_length++;
507     }
508 }
509
510 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
511                                   u32 table_index_or_table_id,
512                                   u32 flags)
513 {
514   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
515   ip_lookup_main_t * lm = &im->lookup_main;
516   u32 i, l;
517   ip4_address_t a;
518   ip4_add_del_route_callback_t * cb;
519   static ip4_address_t * to_delete;
520
521   if (lm->n_adjacency_remaps == 0)
522     return;
523
524   for (l = 0; l <= 32; l++)
525     {
526       hash_pair_t * p;
527       uword * hash = fib->adj_index_by_dst_address[l];
528
529       if (hash_elts (hash) == 0)
530         continue;
531
532       if (to_delete)
533         _vec_len (to_delete) = 0;
534
535       hash_foreach_pair (p, hash, ({
536         u32 adj_index = p->value[0];
537         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
538
539         if (m)
540           {
541             /* Record destination address from hash key. */
542             a.data_u32 = p->key;
543
544             /* New adjacency points to nothing: so delete prefix. */
545             if (m == ~0)
546               vec_add1 (to_delete, a);
547             else
548               {
549                 /* Remap to new adjacency. */
550                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
551
552                 /* Set new adjacency value. */
553                 fib->new_hash_values[0] = p->value[0] = m - 1;
554
555                 vec_foreach (cb, im->add_del_route_callbacks)
556                   if ((flags & cb->required_flags) == cb->required_flags)
557                     cb->function (im, cb->function_opaque,
558                                   fib, flags | IP4_ROUTE_FLAG_ADD,
559                                   &a, l,
560                                   fib->old_hash_values,
561                                   fib->new_hash_values);
562               }
563           }
564       }));
565
566       fib->new_hash_values[0] = ~0;
567       for (i = 0; i < vec_len (to_delete); i++)
568         {
569           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
570           vec_foreach (cb, im->add_del_route_callbacks)
571             if ((flags & cb->required_flags) == cb->required_flags)
572               cb->function (im, cb->function_opaque,
573                             fib, flags | IP4_ROUTE_FLAG_DEL,
574                             &a, l,
575                             fib->old_hash_values,
576                             fib->new_hash_values);
577         }
578     }
579
580   /* Also remap adjacencies in mtrie. */
581   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
582
583   /* Reset mapping table. */
584   vec_zero (lm->adjacency_remap_table);
585
586   /* All remaps have been performed. */
587   lm->n_adjacency_remaps = 0;
588 }
589
590 void ip4_delete_matching_routes (ip4_main_t * im,
591                                  u32 table_index_or_table_id,
592                                  u32 flags,
593                                  ip4_address_t * address,
594                                  u32 address_length)
595 {
596   static ip4_address_t * matching_addresses;
597   static u8 * matching_address_lengths;
598   u32 l, i;
599   ip4_add_del_route_args_t a;
600
601   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
602   a.table_index_or_table_id = table_index_or_table_id;
603   a.adj_index = ~0;
604   a.add_adj = 0;
605   a.n_add_adj = 0;
606
607   for (l = address_length + 1; l <= 32; l++)
608     {
609       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
610                                   address,
611                                   l,
612                                   &matching_addresses,
613                                   &matching_address_lengths);
614       for (i = 0; i < vec_len (matching_addresses); i++)
615         {
616           a.dst_address = matching_addresses[i];
617           a.dst_address_length = matching_address_lengths[i];
618           ip4_add_del_route (im, &a);
619         }
620     }
621
622   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
623 }
624
625 always_inline uword
626 ip4_lookup_inline (vlib_main_t * vm,
627                    vlib_node_runtime_t * node,
628                    vlib_frame_t * frame,
629                    int lookup_for_responses_to_locally_received_packets,
630                    int is_indirect)
631 {
632   ip4_main_t * im = &ip4_main;
633   ip_lookup_main_t * lm = &im->lookup_main;
634   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
635   u32 n_left_from, n_left_to_next, * from, * to_next;
636   ip_lookup_next_t next;
637   u32 cpu_index = os_get_cpu_number();
638
639   from = vlib_frame_vector_args (frame);
640   n_left_from = frame->n_vectors;
641   next = node->cached_next_index;
642
643   while (n_left_from > 0)
644     {
645       vlib_get_next_frame (vm, node, next,
646                            to_next, n_left_to_next);
647
648       while (n_left_from >= 4 && n_left_to_next >= 2)
649         {
650           vlib_buffer_t * p0, * p1;
651           ip4_header_t * ip0, * ip1;
652           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
653           ip_lookup_next_t next0, next1;
654           ip_adjacency_t * adj0, * adj1;
655           ip4_fib_mtrie_t * mtrie0, * mtrie1;
656           ip4_fib_mtrie_leaf_t leaf0, leaf1;
657           ip4_address_t * dst_addr0, *dst_addr1;
658           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
659           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
660           u32 flow_hash_config0, flow_hash_config1;
661           u32 hash_c0, hash_c1;
662           u32 wrong_next;
663
664           /* Prefetch next iteration. */
665           {
666             vlib_buffer_t * p2, * p3;
667
668             p2 = vlib_get_buffer (vm, from[2]);
669             p3 = vlib_get_buffer (vm, from[3]);
670
671             vlib_prefetch_buffer_header (p2, LOAD);
672             vlib_prefetch_buffer_header (p3, LOAD);
673
674             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
675             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
676           }
677
678           pi0 = to_next[0] = from[0];
679           pi1 = to_next[1] = from[1];
680
681           p0 = vlib_get_buffer (vm, pi0);
682           p1 = vlib_get_buffer (vm, pi1);
683
684           ip0 = vlib_buffer_get_current (p0);
685           ip1 = vlib_buffer_get_current (p1);
686
687           if (is_indirect)
688             {
689               ip_adjacency_t * iadj0, * iadj1;
690               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
691               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
692               dst_addr0 = &iadj0->indirect.next_hop.ip4;
693               dst_addr1 = &iadj1->indirect.next_hop.ip4;
694             }
695           else
696             {
697               dst_addr0 = &ip0->dst_address;
698               dst_addr1 = &ip1->dst_address;
699             }
700
701           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
702           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
703           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
704             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
705           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
706             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
707
708
709           if (! lookup_for_responses_to_locally_received_packets)
710             {
711               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
712               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
713
714               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
715
716               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
717               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
718             }
719
720           tcp0 = (void *) (ip0 + 1);
721           tcp1 = (void *) (ip1 + 1);
722
723           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
724                          || ip0->protocol == IP_PROTOCOL_UDP);
725           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
726                          || ip1->protocol == IP_PROTOCOL_UDP);
727
728           if (! lookup_for_responses_to_locally_received_packets)
729             {
730               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
731               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
732             }
733
734           if (! lookup_for_responses_to_locally_received_packets)
735             {
736               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
737               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
738             }
739
740           if (! lookup_for_responses_to_locally_received_packets)
741             {
742               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
743               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
744             }
745
746           if (lookup_for_responses_to_locally_received_packets)
747             {
748               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
749               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
750             }
751           else
752             {
753               /* Handle default route. */
754               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
755               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
756
757               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
758               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
759             }
760
761           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
762                                                            dst_addr0,
763                                                            /* no_default_route */ 0));
764           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
765                                                            dst_addr1,
766                                                            /* no_default_route */ 0));
767           adj0 = ip_get_adjacency (lm, adj_index0);
768           adj1 = ip_get_adjacency (lm, adj_index1);
769
770           next0 = adj0->lookup_next_index;
771           next1 = adj1->lookup_next_index;
772
773           /* Use flow hash to compute multipath adjacency. */
774           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
775           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
776           if (PREDICT_FALSE (adj0->n_adj > 1))
777             {
778               flow_hash_config0 = 
779                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
780               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
781                 ip4_compute_flow_hash (ip0, flow_hash_config0);
782             }
783           if (PREDICT_FALSE(adj1->n_adj > 1))
784             {
785               flow_hash_config1 = 
786                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
787               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
788                 ip4_compute_flow_hash (ip1, flow_hash_config1);
789             }
790
791           ASSERT (adj0->n_adj > 0);
792           ASSERT (adj1->n_adj > 0);
793           ASSERT (is_pow2 (adj0->n_adj));
794           ASSERT (is_pow2 (adj1->n_adj));
795           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
796           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
797
798           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
799           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
800
801           vlib_increment_combined_counter 
802               (cm, cpu_index, adj_index0, 1,
803                vlib_buffer_length_in_chain (vm, p0) 
804                + sizeof(ethernet_header_t));
805           vlib_increment_combined_counter 
806               (cm, cpu_index, adj_index1, 1,
807                vlib_buffer_length_in_chain (vm, p1)
808                + sizeof(ethernet_header_t));
809
810           from += 2;
811           to_next += 2;
812           n_left_to_next -= 2;
813           n_left_from -= 2;
814
815           wrong_next = (next0 != next) + 2*(next1 != next);
816           if (PREDICT_FALSE (wrong_next != 0))
817             {
818               switch (wrong_next)
819                 {
820                 case 1:
821                   /* A B A */
822                   to_next[-2] = pi1;
823                   to_next -= 1;
824                   n_left_to_next += 1;
825                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
826                   break;
827
828                 case 2:
829                   /* A A B */
830                   to_next -= 1;
831                   n_left_to_next += 1;
832                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
833                   break;
834
835                 case 3:
836                   /* A B C */
837                   to_next -= 2;
838                   n_left_to_next += 2;
839                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
840                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
841                   if (next0 == next1)
842                     {
843                       /* A B B */
844                       vlib_put_next_frame (vm, node, next, n_left_to_next);
845                       next = next1;
846                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
847                     }
848                 }
849             }
850         }
851     
852       while (n_left_from > 0 && n_left_to_next > 0)
853         {
854           vlib_buffer_t * p0;
855           ip4_header_t * ip0;
856           __attribute__((unused)) tcp_header_t * tcp0;
857           ip_lookup_next_t next0;
858           ip_adjacency_t * adj0;
859           ip4_fib_mtrie_t * mtrie0;
860           ip4_fib_mtrie_leaf_t leaf0;
861           ip4_address_t * dst_addr0;
862           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
863           u32 flow_hash_config0, hash_c0;
864
865           pi0 = from[0];
866           to_next[0] = pi0;
867
868           p0 = vlib_get_buffer (vm, pi0);
869
870           ip0 = vlib_buffer_get_current (p0);
871
872           if (is_indirect)
873             {
874               ip_adjacency_t * iadj0;
875               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
876               dst_addr0 = &iadj0->indirect.next_hop.ip4;
877             }
878           else
879             {
880               dst_addr0 = &ip0->dst_address;
881             }
882
883           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
884           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
885             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
886
887           if (! lookup_for_responses_to_locally_received_packets)
888             {
889               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
890
891               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
892
893               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
894             }
895
896           tcp0 = (void *) (ip0 + 1);
897
898           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
899                          || ip0->protocol == IP_PROTOCOL_UDP);
900
901           if (! lookup_for_responses_to_locally_received_packets)
902             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
903
904           if (! lookup_for_responses_to_locally_received_packets)
905             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
906
907           if (! lookup_for_responses_to_locally_received_packets)
908             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
909
910           if (lookup_for_responses_to_locally_received_packets)
911             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
912           else
913             {
914               /* Handle default route. */
915               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
916               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
917             }
918
919           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
920                                                            dst_addr0,
921                                                            /* no_default_route */ 0));
922
923           adj0 = ip_get_adjacency (lm, adj_index0);
924
925           next0 = adj0->lookup_next_index;
926
927           /* Use flow hash to compute multipath adjacency. */
928           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
929           if (PREDICT_FALSE(adj0->n_adj > 1))
930             {
931               flow_hash_config0 = 
932                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
933
934               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
935                 ip4_compute_flow_hash (ip0, flow_hash_config0);
936             }
937
938           ASSERT (adj0->n_adj > 0);
939           ASSERT (is_pow2 (adj0->n_adj));
940           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
941
942           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
943
944           vlib_increment_combined_counter 
945               (cm, cpu_index, adj_index0, 1,
946                vlib_buffer_length_in_chain (vm, p0)
947                + sizeof(ethernet_header_t));
948
949           from += 1;
950           to_next += 1;
951           n_left_to_next -= 1;
952           n_left_from -= 1;
953
954           if (PREDICT_FALSE (next0 != next))
955             {
956               n_left_to_next += 1;
957               vlib_put_next_frame (vm, node, next, n_left_to_next);
958               next = next0;
959               vlib_get_next_frame (vm, node, next,
960                                    to_next, n_left_to_next);
961               to_next[0] = pi0;
962               to_next += 1;
963               n_left_to_next -= 1;
964             }
965         }
966
967       vlib_put_next_frame (vm, node, next, n_left_to_next);
968     }
969
970   return frame->n_vectors;
971 }
972
973 static uword
974 ip4_lookup (vlib_main_t * vm,
975             vlib_node_runtime_t * node,
976             vlib_frame_t * frame)
977 {
978   return ip4_lookup_inline (vm, node, frame,
979                             /* lookup_for_responses_to_locally_received_packets */ 0,
980                             /* is_indirect */ 0);
981
982 }
983
984 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
985                                         ip_adjacency_t * adj,
986                                         u32 sw_if_index,
987                                         u32 if_address_index)
988 {
989   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
990   ip_lookup_next_t n;
991   vnet_l3_packet_type_t packet_type;
992   u32 node_index;
993
994   if (hw->hw_class_index == ethernet_hw_interface_class.index
995       || hw->hw_class_index == srp_hw_interface_class.index)
996     {
997       /* 
998        * We have a bit of a problem in this case. ip4-arp uses
999        * the rewrite_header.next_index to hand pkts to the
1000        * indicated inteface output node. We can end up in
1001        * ip4_rewrite_local, too, which also pays attention to 
1002        * rewrite_header.next index. Net result: a hack in
1003        * ip4_rewrite_local...
1004        */
1005       n = IP_LOOKUP_NEXT_ARP;
1006       node_index = ip4_arp_node.index;
1007       adj->if_address_index = if_address_index;
1008       adj->arp.next_hop.ip4.as_u32 = 0;
1009       ip46_address_reset(&adj->arp.next_hop);
1010       packet_type = VNET_L3_PACKET_TYPE_ARP;
1011     }
1012   else
1013     {
1014       n = IP_LOOKUP_NEXT_REWRITE;
1015       node_index = ip4_rewrite_node.index;
1016       packet_type = VNET_L3_PACKET_TYPE_IP4;
1017     }
1018
1019   adj->lookup_next_index = n;
1020   vnet_rewrite_for_sw_interface
1021     (vnm,
1022      packet_type,
1023      sw_if_index,
1024      node_index,
1025      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1026      &adj->rewrite_header,
1027      sizeof (adj->rewrite_data));
1028 }
1029
1030 static void
1031 ip4_add_interface_routes (u32 sw_if_index,
1032                           ip4_main_t * im, u32 fib_index,
1033                           ip_interface_address_t * a)
1034 {
1035   vnet_main_t * vnm = vnet_get_main();
1036   ip_lookup_main_t * lm = &im->lookup_main;
1037   ip_adjacency_t * adj;
1038   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1039   ip4_add_del_route_args_t x;
1040   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1041   u32 classify_table_index;
1042
1043   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1044   x.table_index_or_table_id = fib_index;
1045   x.flags = (IP4_ROUTE_FLAG_ADD
1046              | IP4_ROUTE_FLAG_FIB_INDEX
1047              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1048   x.dst_address = address[0];
1049   x.dst_address_length = a->address_length;
1050   x.n_add_adj = 0;
1051   x.add_adj = 0;
1052
1053   a->neighbor_probe_adj_index = ~0;
1054   if (a->address_length < 32)
1055     {
1056       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1057                               &x.adj_index);
1058       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1059       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1060       ip4_add_del_route (im, &x);
1061       a->neighbor_probe_adj_index = x.adj_index;
1062     }
1063   
1064   /* Add e.g. 1.1.1.1/32 as local to this host. */
1065   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1066                           &x.adj_index);
1067   
1068   classify_table_index = ~0;
1069   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1070     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1071   if (classify_table_index != (u32) ~0)
1072     {
1073       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1074       adj->classify.table_index = classify_table_index;
1075     }
1076   else
1077     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1078   
1079   adj->if_address_index = a - lm->if_address_pool;
1080   adj->rewrite_header.sw_if_index = sw_if_index;
1081   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1082   /* 
1083    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1084    * fail an RPF-ish check, but still go thru the rewrite code...
1085    */
1086   adj->rewrite_header.data_bytes = 0;
1087
1088   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1089   x.dst_address_length = 32;
1090   ip4_add_del_route (im, &x);
1091 }
1092
1093 static void
1094 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1095 {
1096   ip4_add_del_route_args_t x;
1097
1098   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1099   x.table_index_or_table_id = fib_index;
1100   x.flags = (IP4_ROUTE_FLAG_DEL
1101              | IP4_ROUTE_FLAG_FIB_INDEX
1102              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1103   x.dst_address = address[0];
1104   x.dst_address_length = address_length;
1105   x.adj_index = ~0;
1106   x.n_add_adj = 0;
1107   x.add_adj = 0;
1108
1109   if (address_length < 32)
1110     ip4_add_del_route (im, &x);
1111
1112   x.dst_address_length = 32;
1113   ip4_add_del_route (im, &x);
1114
1115   ip4_delete_matching_routes (im,
1116                               fib_index,
1117                               IP4_ROUTE_FLAG_FIB_INDEX,
1118                               address,
1119                               address_length);
1120 }
1121
1122 typedef struct {
1123     u32 sw_if_index;
1124     ip4_address_t address;
1125     u32 length;
1126 } ip4_interface_address_t;
1127
1128 static clib_error_t *
1129 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1130                                         u32 sw_if_index,
1131                                         ip4_address_t * new_address,
1132                                         u32 new_length,
1133                                         u32 redistribute,
1134                                         u32 insert_routes,
1135                                         u32 is_del);
1136
1137 static clib_error_t *
1138 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1139                                         u32 sw_if_index,
1140                                         ip4_address_t * address,
1141                                         u32 address_length,
1142                                         u32 redistribute,
1143                                         u32 insert_routes,
1144                                         u32 is_del)
1145 {
1146   vnet_main_t * vnm = vnet_get_main();
1147   ip4_main_t * im = &ip4_main;
1148   ip_lookup_main_t * lm = &im->lookup_main;
1149   clib_error_t * error = 0;
1150   u32 if_address_index, elts_before;
1151   ip4_address_fib_t ip4_af, * addr_fib = 0;
1152
1153   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1154   ip4_addr_fib_init (&ip4_af, address,
1155                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1156   vec_add1 (addr_fib, ip4_af);
1157
1158   /* When adding an address check that it does not conflict with an existing address. */
1159   if (! is_del)
1160     {
1161       ip_interface_address_t * ia;
1162       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1163                                     0 /* honor unnumbered */,
1164       ({
1165         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1166
1167         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1168             || ip4_destination_matches_route (im, x, address, address_length))
1169           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1170                                     format_ip4_address_and_length, address, address_length,
1171                                     format_ip4_address_and_length, x, ia->address_length,
1172                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1173       }));
1174     }
1175
1176   elts_before = pool_elts (lm->if_address_pool);
1177
1178   error = ip_interface_address_add_del
1179     (lm,
1180      sw_if_index,
1181      addr_fib,
1182      address_length,
1183      is_del,
1184      &if_address_index);
1185   if (error)
1186     goto done;
1187   
1188   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1189     {
1190       if (is_del)
1191         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1192                                   address_length);
1193       
1194       else
1195           ip4_add_interface_routes (sw_if_index,
1196                                     im, ip4_af.fib_index,
1197                                     pool_elt_at_index 
1198                                     (lm->if_address_pool, if_address_index));
1199     }
1200
1201   /* If pool did not grow/shrink: add duplicate address. */
1202   if (elts_before != pool_elts (lm->if_address_pool))
1203     {
1204       ip4_add_del_interface_address_callback_t * cb;
1205       vec_foreach (cb, im->add_del_interface_address_callbacks)
1206         cb->function (im, cb->function_opaque, sw_if_index,
1207                       address, address_length,
1208                       if_address_index,
1209                       is_del);
1210     }
1211
1212  done:
1213   vec_free (addr_fib);
1214   return error;
1215 }
1216
1217 clib_error_t *
1218 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1219                                ip4_address_t * address, u32 address_length,
1220                                u32 is_del)
1221 {
1222   return ip4_add_del_interface_address_internal
1223     (vm, sw_if_index, address, address_length,
1224      /* redistribute */ 1,
1225      /* insert_routes */ 1,
1226      is_del);
1227 }
1228
1229 static clib_error_t *
1230 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1231                                 u32 sw_if_index,
1232                                 u32 flags)
1233 {
1234   ip4_main_t * im = &ip4_main;
1235   ip_interface_address_t * ia;
1236   ip4_address_t * a;
1237   u32 is_admin_up, fib_index;
1238   
1239   /* Fill in lookup tables with default table (0). */
1240   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1241   
1242   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1243   
1244   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1245   
1246   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1247
1248   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1249                                 0 /* honor unnumbered */,
1250   ({
1251     a = ip_interface_address_get_address (&im->lookup_main, ia);
1252     if (is_admin_up)
1253       ip4_add_interface_routes (sw_if_index,
1254                                 im, fib_index,
1255                                 ia);
1256     else
1257       ip4_del_interface_routes (im, fib_index,
1258                                 a, ia->address_length);
1259   }));
1260
1261   return 0;
1262 }
1263  
1264 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1265
1266 static clib_error_t *
1267 ip4_sw_interface_add_del (vnet_main_t * vnm,
1268                           u32 sw_if_index,
1269                           u32 is_add)
1270 {
1271   vlib_main_t * vm = vnm->vlib_main;
1272   ip4_main_t * im = &ip4_main;
1273   ip_lookup_main_t * lm = &im->lookup_main;
1274   u32 ci, cast;
1275
1276   for (cast = 0; cast < VNET_N_CAST; cast++)
1277     {
1278       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1279       vnet_config_main_t * vcm = &cm->config_main;
1280
1281       if (! vcm->node_index_by_feature_index)
1282         {
1283           if (cast == VNET_UNICAST)
1284             {
1285               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1286               static char * feature_nodes[] = {
1287                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1288                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1289                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1290                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1291                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1292                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1293               };
1294
1295               vnet_config_init (vm, vcm,
1296                                 start_nodes, ARRAY_LEN (start_nodes),
1297                                 feature_nodes, ARRAY_LEN (feature_nodes));
1298             }
1299           else
1300             {
1301               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1302               static char * feature_nodes[] = {
1303                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1304                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1305               };
1306
1307               vnet_config_init (vm, vcm,
1308                                 start_nodes, ARRAY_LEN (start_nodes),
1309                                 feature_nodes, ARRAY_LEN (feature_nodes));
1310             }
1311         }
1312
1313       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1314       ci = cm->config_index_by_sw_if_index[sw_if_index];
1315
1316       if (is_add)
1317         ci = vnet_config_add_feature (vm, vcm,
1318                                       ci,
1319                                       IP4_RX_FEATURE_LOOKUP,
1320                                       /* config data */ 0,
1321                                       /* # bytes of config data */ 0);
1322       else
1323         ci = vnet_config_del_feature (vm, vcm,
1324                                       ci,
1325                                       IP4_RX_FEATURE_LOOKUP,
1326                                       /* config data */ 0,
1327                                       /* # bytes of config data */ 0);
1328
1329       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1330     }
1331
1332   return /* no error */ 0;
1333 }
1334
1335 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1336
1337
1338 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1339   .function = ip4_lookup,
1340   .name = "ip4-lookup",
1341   .vector_size = sizeof (u32),
1342
1343   .n_next_nodes = IP_LOOKUP_N_NEXT,
1344   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1345 };
1346
1347 static uword
1348 ip4_indirect (vlib_main_t * vm,
1349                vlib_node_runtime_t * node,
1350                vlib_frame_t * frame)
1351 {
1352   return ip4_lookup_inline (vm, node, frame,
1353                             /* lookup_for_responses_to_locally_received_packets */ 0,
1354                             /* is_indirect */ 1);
1355 }
1356
1357 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1358   .function = ip4_indirect,
1359   .name = "ip4-indirect",
1360   .vector_size = sizeof (u32),
1361
1362   .n_next_nodes = IP_LOOKUP_N_NEXT,
1363   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1364 };
1365
1366
1367 /* Global IP4 main. */
1368 ip4_main_t ip4_main;
1369
1370 clib_error_t *
1371 ip4_lookup_init (vlib_main_t * vm)
1372 {
1373   ip4_main_t * im = &ip4_main;
1374   uword i;
1375
1376   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1377     {
1378       u32 m;
1379
1380       if (i < 32)
1381         m = pow2_mask (i) << (32 - i);
1382       else 
1383         m = ~0;
1384       im->fib_masks[i] = clib_host_to_net_u32 (m);
1385     }
1386
1387   /* Create FIB with index 0 and table id of 0. */
1388   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1389
1390   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1391
1392   {
1393     pg_node_t * pn;
1394     pn = pg_get_node (ip4_lookup_node.index);
1395     pn->unformat_edit = unformat_pg_ip4_header;
1396   }
1397
1398   {
1399     ethernet_arp_header_t h;
1400
1401     memset (&h, 0, sizeof (h));
1402
1403     /* Set target ethernet address to all zeros. */
1404     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1405
1406 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1407 #define _8(f,v) h.f = v;
1408     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1409     _16 (l3_type, ETHERNET_TYPE_IP4);
1410     _8 (n_l2_address_bytes, 6);
1411     _8 (n_l3_address_bytes, 4);
1412     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1413 #undef _16
1414 #undef _8
1415
1416     vlib_packet_template_init (vm,
1417                                &im->ip4_arp_request_packet_template,
1418                                /* data */ &h,
1419                                sizeof (h),
1420                                /* alloc chunk size */ 8,
1421                                "ip4 arp");
1422   }
1423
1424   return 0;
1425 }
1426
1427 VLIB_INIT_FUNCTION (ip4_lookup_init);
1428
1429 typedef struct {
1430   /* Adjacency taken. */
1431   u32 adj_index;
1432   u32 flow_hash;
1433   u32 fib_index;
1434
1435   /* Packet data, possibly *after* rewrite. */
1436   u8 packet_data[64 - 1*sizeof(u32)];
1437 } ip4_forward_next_trace_t;
1438
1439 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1440 {
1441   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1442   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1443   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1444   vnet_main_t * vnm = vnet_get_main();
1445   ip4_main_t * im = &ip4_main;
1446   ip_adjacency_t * adj;
1447   uword indent = format_get_indent (s);
1448
1449   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1450   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1451               t->fib_index, t->adj_index, format_ip_adjacency,
1452               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1453   switch (adj->lookup_next_index)
1454     {
1455     case IP_LOOKUP_NEXT_REWRITE:
1456       s = format (s, "\n%U%U",
1457                   format_white_space, indent,
1458                   format_ip_adjacency_packet_data,
1459                   vnm, &im->lookup_main, t->adj_index,
1460                   t->packet_data, sizeof (t->packet_data));
1461       break;
1462
1463     default:
1464       break;
1465     }
1466
1467   return s;
1468 }
1469
1470 /* Common trace function for all ip4-forward next nodes. */
1471 void
1472 ip4_forward_next_trace (vlib_main_t * vm,
1473                         vlib_node_runtime_t * node,
1474                         vlib_frame_t * frame,
1475                         vlib_rx_or_tx_t which_adj_index)
1476 {
1477   u32 * from, n_left;
1478   ip4_main_t * im = &ip4_main;
1479
1480   n_left = frame->n_vectors;
1481   from = vlib_frame_vector_args (frame);
1482   
1483   while (n_left >= 4)
1484     {
1485       u32 bi0, bi1;
1486       vlib_buffer_t * b0, * b1;
1487       ip4_forward_next_trace_t * t0, * t1;
1488
1489       /* Prefetch next iteration. */
1490       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1491       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1492
1493       bi0 = from[0];
1494       bi1 = from[1];
1495
1496       b0 = vlib_get_buffer (vm, bi0);
1497       b1 = vlib_get_buffer (vm, bi1);
1498
1499       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1500         {
1501           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1502           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1503           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1504           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1505                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1506           clib_memcpy (t0->packet_data,
1507                   vlib_buffer_get_current (b0),
1508                   sizeof (t0->packet_data));
1509         }
1510       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1511         {
1512           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1513           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1514           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1515           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1516                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1517           clib_memcpy (t1->packet_data,
1518                   vlib_buffer_get_current (b1),
1519                   sizeof (t1->packet_data));
1520         }
1521       from += 2;
1522       n_left -= 2;
1523     }
1524
1525   while (n_left >= 1)
1526     {
1527       u32 bi0;
1528       vlib_buffer_t * b0;
1529       ip4_forward_next_trace_t * t0;
1530
1531       bi0 = from[0];
1532
1533       b0 = vlib_get_buffer (vm, bi0);
1534
1535       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1536         {
1537           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1538           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1539           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1540           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1541                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1542           clib_memcpy (t0->packet_data,
1543                   vlib_buffer_get_current (b0),
1544                   sizeof (t0->packet_data));
1545         }
1546       from += 1;
1547       n_left -= 1;
1548     }
1549 }
1550
1551 static uword
1552 ip4_drop_or_punt (vlib_main_t * vm,
1553                   vlib_node_runtime_t * node,
1554                   vlib_frame_t * frame,
1555                   ip4_error_t error_code)
1556 {
1557   u32 * buffers = vlib_frame_vector_args (frame);
1558   uword n_packets = frame->n_vectors;
1559
1560   vlib_error_drop_buffers (vm, node,
1561                            buffers,
1562                            /* stride */ 1,
1563                            n_packets,
1564                            /* next */ 0,
1565                            ip4_input_node.index,
1566                            error_code);
1567
1568   if (node->flags & VLIB_NODE_FLAG_TRACE)
1569     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1570
1571   return n_packets;
1572 }
1573
1574 static uword
1575 ip4_drop (vlib_main_t * vm,
1576           vlib_node_runtime_t * node,
1577           vlib_frame_t * frame)
1578 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1579
1580 static uword
1581 ip4_punt (vlib_main_t * vm,
1582           vlib_node_runtime_t * node,
1583           vlib_frame_t * frame)
1584 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1585
1586 static uword
1587 ip4_miss (vlib_main_t * vm,
1588           vlib_node_runtime_t * node,
1589           vlib_frame_t * frame)
1590 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1591
1592 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1593   .function = ip4_drop,
1594   .name = "ip4-drop",
1595   .vector_size = sizeof (u32),
1596
1597   .format_trace = format_ip4_forward_next_trace,
1598
1599   .n_next_nodes = 1,
1600   .next_nodes = {
1601     [0] = "error-drop",
1602   },
1603 };
1604
1605 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1606   .function = ip4_punt,
1607   .name = "ip4-punt",
1608   .vector_size = sizeof (u32),
1609
1610   .format_trace = format_ip4_forward_next_trace,
1611
1612   .n_next_nodes = 1,
1613   .next_nodes = {
1614     [0] = "error-punt",
1615   },
1616 };
1617
1618 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1619   .function = ip4_miss,
1620   .name = "ip4-miss",
1621   .vector_size = sizeof (u32),
1622
1623   .format_trace = format_ip4_forward_next_trace,
1624
1625   .n_next_nodes = 1,
1626   .next_nodes = {
1627     [0] = "error-drop",
1628   },
1629 };
1630
1631 /* Compute TCP/UDP/ICMP4 checksum in software. */
1632 u16
1633 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1634                               ip4_header_t * ip0)
1635 {
1636   ip_csum_t sum0;
1637   u32 ip_header_length, payload_length_host_byte_order;
1638   u32 n_this_buffer, n_bytes_left;
1639   u16 sum16;
1640   void * data_this_buffer;
1641   
1642   /* Initialize checksum with ip header. */
1643   ip_header_length = ip4_header_bytes (ip0);
1644   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1645   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1646
1647   if (BITS (uword) == 32)
1648     {
1649       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1650       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1651     }
1652   else
1653     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1654
1655   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1656   data_this_buffer = (void *) ip0 + ip_header_length;
1657   if (n_this_buffer + ip_header_length > p0->current_length)
1658     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1659   while (1)
1660     {
1661       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1662       n_bytes_left -= n_this_buffer;
1663       if (n_bytes_left == 0)
1664         break;
1665
1666       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1667       p0 = vlib_get_buffer (vm, p0->next_buffer);
1668       data_this_buffer = vlib_buffer_get_current (p0);
1669       n_this_buffer = p0->current_length;
1670     }
1671
1672   sum16 = ~ ip_csum_fold (sum0);
1673
1674   return sum16;
1675 }
1676
1677 static u32
1678 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1679 {
1680   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1681   udp_header_t * udp0;
1682   u16 sum16;
1683
1684   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1685           || ip0->protocol == IP_PROTOCOL_UDP);
1686
1687   udp0 = (void *) (ip0 + 1);
1688   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1689     {
1690       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1691                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1692       return p0->flags;
1693     }
1694
1695   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1696
1697   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1698                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1699
1700   return p0->flags;
1701 }
1702
1703 static uword
1704 ip4_local (vlib_main_t * vm,
1705            vlib_node_runtime_t * node,
1706            vlib_frame_t * frame)
1707 {
1708   ip4_main_t * im = &ip4_main;
1709   ip_lookup_main_t * lm = &im->lookup_main;
1710   ip_local_next_t next_index;
1711   u32 * from, * to_next, n_left_from, n_left_to_next;
1712   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1713
1714   from = vlib_frame_vector_args (frame);
1715   n_left_from = frame->n_vectors;
1716   next_index = node->cached_next_index;
1717   
1718   if (node->flags & VLIB_NODE_FLAG_TRACE)
1719     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1720
1721   while (n_left_from > 0)
1722     {
1723       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1724
1725       while (n_left_from >= 4 && n_left_to_next >= 2)
1726         {
1727           vlib_buffer_t * p0, * p1;
1728           ip4_header_t * ip0, * ip1;
1729           udp_header_t * udp0, * udp1;
1730           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1731           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1732           ip_adjacency_t * adj0, * adj1;
1733           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1734           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1735           i32 len_diff0, len_diff1;
1736           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1737           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1738           u8 enqueue_code;
1739       
1740           pi0 = to_next[0] = from[0];
1741           pi1 = to_next[1] = from[1];
1742           from += 2;
1743           n_left_from -= 2;
1744           to_next += 2;
1745           n_left_to_next -= 2;
1746       
1747           p0 = vlib_get_buffer (vm, pi0);
1748           p1 = vlib_get_buffer (vm, pi1);
1749
1750           ip0 = vlib_buffer_get_current (p0);
1751           ip1 = vlib_buffer_get_current (p1);
1752
1753           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1754                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1755           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1756                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1757
1758           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1759           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1760
1761           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1762
1763           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1764           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1765
1766           proto0 = ip0->protocol;
1767           proto1 = ip1->protocol;
1768           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1769           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1770           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1771           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1772
1773           flags0 = p0->flags;
1774           flags1 = p1->flags;
1775
1776           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1777           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1778
1779           udp0 = ip4_next_header (ip0);
1780           udp1 = ip4_next_header (ip1);
1781
1782           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1783           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1784           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1785
1786           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1787           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1788
1789           /* Verify UDP length. */
1790           ip_len0 = clib_net_to_host_u16 (ip0->length);
1791           ip_len1 = clib_net_to_host_u16 (ip1->length);
1792           udp_len0 = clib_net_to_host_u16 (udp0->length);
1793           udp_len1 = clib_net_to_host_u16 (udp1->length);
1794
1795           len_diff0 = ip_len0 - udp_len0;
1796           len_diff1 = ip_len1 - udp_len1;
1797
1798           len_diff0 = is_udp0 ? len_diff0 : 0;
1799           len_diff1 = is_udp1 ? len_diff1 : 0;
1800
1801           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1802                                 & good_tcp_udp0 & good_tcp_udp1)))
1803             {
1804               if (is_tcp_udp0)
1805                 {
1806                   if (is_tcp_udp0
1807                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1808                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1809                   good_tcp_udp0 =
1810                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1811                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1812                 }
1813               if (is_tcp_udp1)
1814                 {
1815                   if (is_tcp_udp1
1816                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1817                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1818                   good_tcp_udp1 =
1819                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1820                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1821                 }
1822             }
1823
1824           good_tcp_udp0 &= len_diff0 >= 0;
1825           good_tcp_udp1 &= len_diff1 >= 0;
1826
1827           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1828           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1829
1830           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1831
1832           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1833           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1834
1835           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1836           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1837                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1838                     : error0);
1839           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1840                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1841                     : error1);
1842
1843           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1844           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1845
1846           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1847           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1848
1849           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1850           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1851
1852           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1853                                                            &ip0->src_address,
1854                                                            /* no_default_route */ 1));
1855           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1856                                                            &ip1->src_address,
1857                                                            /* no_default_route */ 1));
1858
1859           adj0 = ip_get_adjacency (lm, adj_index0);
1860           adj1 = ip_get_adjacency (lm, adj_index1);
1861
1862           /* 
1863            * Must have a route to source otherwise we drop the packet.
1864            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1865            */
1866           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1867                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1868                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1869                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1870                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1871                     ? IP4_ERROR_SRC_LOOKUP_MISS
1872                     : error0);
1873           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1874                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1875                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1876                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1877                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1878                     ? IP4_ERROR_SRC_LOOKUP_MISS
1879                     : error1);
1880
1881           next0 = lm->local_next_by_ip_protocol[proto0];
1882           next1 = lm->local_next_by_ip_protocol[proto1];
1883
1884           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1885           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1886
1887           p0->error = error0 ? error_node->errors[error0] : 0;
1888           p1->error = error1 ? error_node->errors[error1] : 0;
1889
1890           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1891
1892           if (PREDICT_FALSE (enqueue_code != 0))
1893             {
1894               switch (enqueue_code)
1895                 {
1896                 case 1:
1897                   /* A B A */
1898                   to_next[-2] = pi1;
1899                   to_next -= 1;
1900                   n_left_to_next += 1;
1901                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1902                   break;
1903
1904                 case 2:
1905                   /* A A B */
1906                   to_next -= 1;
1907                   n_left_to_next += 1;
1908                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1909                   break;
1910
1911                 case 3:
1912                   /* A B B or A B C */
1913                   to_next -= 2;
1914                   n_left_to_next += 2;
1915                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1916                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1917                   if (next0 == next1)
1918                     {
1919                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1920                       next_index = next1;
1921                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1922                     }
1923                   break;
1924                 }
1925             }
1926         }
1927
1928       while (n_left_from > 0 && n_left_to_next > 0)
1929         {
1930           vlib_buffer_t * p0;
1931           ip4_header_t * ip0;
1932           udp_header_t * udp0;
1933           ip4_fib_mtrie_t * mtrie0;
1934           ip4_fib_mtrie_leaf_t leaf0;
1935           ip_adjacency_t * adj0;
1936           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1937           i32 len_diff0;
1938           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1939       
1940           pi0 = to_next[0] = from[0];
1941           from += 1;
1942           n_left_from -= 1;
1943           to_next += 1;
1944           n_left_to_next -= 1;
1945       
1946           p0 = vlib_get_buffer (vm, pi0);
1947
1948           ip0 = vlib_buffer_get_current (p0);
1949
1950           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1951                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1952
1953           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1954
1955           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1956
1957           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1958
1959           proto0 = ip0->protocol;
1960           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1961           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1962
1963           flags0 = p0->flags;
1964
1965           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1966
1967           udp0 = ip4_next_header (ip0);
1968
1969           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1970           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1971
1972           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1973
1974           /* Verify UDP length. */
1975           ip_len0 = clib_net_to_host_u16 (ip0->length);
1976           udp_len0 = clib_net_to_host_u16 (udp0->length);
1977
1978           len_diff0 = ip_len0 - udp_len0;
1979
1980           len_diff0 = is_udp0 ? len_diff0 : 0;
1981
1982           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1983             {
1984               if (is_tcp_udp0)
1985                 {
1986                   if (is_tcp_udp0
1987                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1988                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1989                   good_tcp_udp0 =
1990                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1991                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1992                 }
1993             }
1994
1995           good_tcp_udp0 &= len_diff0 >= 0;
1996
1997           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1998
1999           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2000
2001           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2002
2003           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2004           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2005                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2006                     : error0);
2007
2008           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2009
2010           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2011           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2012
2013           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2014                                                            &ip0->src_address,
2015                                                            /* no_default_route */ 1));
2016
2017           adj0 = ip_get_adjacency (lm, adj_index0);
2018
2019           /* Must have a route to source otherwise we drop the packet. */
2020           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2021                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2022                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2023                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2024                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2025                     ? IP4_ERROR_SRC_LOOKUP_MISS
2026                     : error0);
2027
2028           next0 = lm->local_next_by_ip_protocol[proto0];
2029
2030           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2031
2032           p0->error = error0? error_node->errors[error0] : 0;
2033
2034           if (PREDICT_FALSE (next0 != next_index))
2035             {
2036               n_left_to_next += 1;
2037               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2038
2039               next_index = next0;
2040               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2041               to_next[0] = pi0;
2042               to_next += 1;
2043               n_left_to_next -= 1;
2044             }
2045         }
2046   
2047       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2048     }
2049
2050   return frame->n_vectors;
2051 }
2052
2053 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2054   .function = ip4_local,
2055   .name = "ip4-local",
2056   .vector_size = sizeof (u32),
2057
2058   .format_trace = format_ip4_forward_next_trace,
2059
2060   .n_next_nodes = IP_LOCAL_N_NEXT,
2061   .next_nodes = {
2062     [IP_LOCAL_NEXT_DROP] = "error-drop",
2063     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2064     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2065     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2066   },
2067 };
2068
2069 void ip4_register_protocol (u32 protocol, u32 node_index)
2070 {
2071   vlib_main_t * vm = vlib_get_main();
2072   ip4_main_t * im = &ip4_main;
2073   ip_lookup_main_t * lm = &im->lookup_main;
2074
2075   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2076   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2077 }
2078
2079 static clib_error_t *
2080 show_ip_local_command_fn (vlib_main_t * vm,
2081                           unformat_input_t * input,
2082                          vlib_cli_command_t * cmd)
2083 {
2084   ip4_main_t * im = &ip4_main;
2085   ip_lookup_main_t * lm = &im->lookup_main;
2086   int i;
2087
2088   vlib_cli_output (vm, "Protocols handled by ip4_local");
2089   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2090     {
2091       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2092         vlib_cli_output (vm, "%d", i);
2093     }
2094   return 0;
2095 }
2096
2097
2098
2099 VLIB_CLI_COMMAND (show_ip_local, static) = {
2100   .path = "show ip local",
2101   .function = show_ip_local_command_fn,
2102   .short_help = "Show ip local protocol table",
2103 };
2104
2105 static uword
2106 ip4_arp (vlib_main_t * vm,
2107          vlib_node_runtime_t * node,
2108          vlib_frame_t * frame)
2109 {
2110   vnet_main_t * vnm = vnet_get_main();
2111   ip4_main_t * im = &ip4_main;
2112   ip_lookup_main_t * lm = &im->lookup_main;
2113   u32 * from, * to_next_drop;
2114   uword n_left_from, n_left_to_next_drop, next_index;
2115   static f64 time_last_seed_change = -1e100;
2116   static u32 hash_seeds[3];
2117   static uword hash_bitmap[256 / BITS (uword)]; 
2118   f64 time_now;
2119
2120   if (node->flags & VLIB_NODE_FLAG_TRACE)
2121     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2122
2123   time_now = vlib_time_now (vm);
2124   if (time_now - time_last_seed_change > 1e-3)
2125     {
2126       uword i;
2127       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2128                                              sizeof (hash_seeds));
2129       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2130         hash_seeds[i] = r[i];
2131
2132       /* Mark all hash keys as been no-seen before. */
2133       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2134         hash_bitmap[i] = 0;
2135
2136       time_last_seed_change = time_now;
2137     }
2138
2139   from = vlib_frame_vector_args (frame);
2140   n_left_from = frame->n_vectors;
2141   next_index = node->cached_next_index;
2142   if (next_index == IP4_ARP_NEXT_DROP)
2143     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2144
2145   while (n_left_from > 0)
2146     {
2147       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2148                            to_next_drop, n_left_to_next_drop);
2149
2150       while (n_left_from > 0 && n_left_to_next_drop > 0)
2151         {
2152           vlib_buffer_t * p0;
2153           ip4_header_t * ip0;
2154           ethernet_header_t * eh0;
2155           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2156           uword bm0;
2157           ip_adjacency_t * adj0;
2158
2159           pi0 = from[0];
2160
2161           p0 = vlib_get_buffer (vm, pi0);
2162
2163           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2164           adj0 = ip_get_adjacency (lm, adj_index0);
2165           ip0 = vlib_buffer_get_current (p0);
2166
2167           /* If packet destination is not local, send ARP to next hop */
2168           if (adj0->arp.next_hop.ip4.as_u32)
2169             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2170
2171           /* 
2172            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2173            * rewrite to this packet, we need to skip it here.
2174            * Note, to distinguish from src IP addr *.8.6.*, we
2175            * check for a bcast eth dest instead of IPv4 version.
2176            */
2177           eh0 = (ethernet_header_t*)ip0;
2178           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2179             {
2180               u32 vlan_num = 0;
2181               u16 * etype = &eh0->type;
2182               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2183                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2184                 {
2185                   vlan_num += 1;
2186                   etype += 2; //vlan tag also 16 bits, same as etype
2187                 }
2188               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2189                 {
2190                   vlib_buffer_advance (
2191                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2192                   ip0 = vlib_buffer_get_current (p0);
2193                 }
2194             }
2195
2196           a0 = hash_seeds[0];
2197           b0 = hash_seeds[1];
2198           c0 = hash_seeds[2];
2199
2200           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2201           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2202
2203           a0 ^= ip0->dst_address.data_u32;
2204           b0 ^= sw_if_index0;
2205
2206           hash_v3_finalize32 (a0, b0, c0);
2207
2208           c0 &= BITS (hash_bitmap) - 1;
2209           c0 = c0 / BITS (uword);
2210           m0 = (uword) 1 << (c0 % BITS (uword));
2211
2212           bm0 = hash_bitmap[c0];
2213           drop0 = (bm0 & m0) != 0;
2214
2215           /* Mark it as seen. */
2216           hash_bitmap[c0] = bm0 | m0;
2217
2218           from += 1;
2219           n_left_from -= 1;
2220           to_next_drop[0] = pi0;
2221           to_next_drop += 1;
2222           n_left_to_next_drop -= 1;
2223
2224           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2225
2226           if (drop0)
2227             continue;
2228
2229           /* 
2230            * Can happen if the control-plane is programming tables
2231            * with traffic flowing; at least that's today's lame excuse.
2232            */
2233           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2234             {
2235               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2236             }
2237           else
2238           /* Send ARP request. */
2239           {
2240             u32 bi0 = 0;
2241             vlib_buffer_t * b0;
2242             ethernet_arp_header_t * h0;
2243             vnet_hw_interface_t * hw_if0;
2244
2245             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2246
2247             /* Add rewrite/encap string for ARP packet. */
2248             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2249
2250             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2251
2252             /* Src ethernet address in ARP header. */
2253             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2254                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2255
2256             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2257
2258             /* Copy in destination address we are requesting. */
2259             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2260
2261             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2262             b0 = vlib_get_buffer (vm, bi0);
2263             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2264
2265             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2266
2267             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2268           }
2269         }
2270
2271       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2272     }
2273
2274   return frame->n_vectors;
2275 }
2276
2277 static char * ip4_arp_error_strings[] = {
2278   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2279   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2280   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2281   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2282   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2283 };
2284
2285 VLIB_REGISTER_NODE (ip4_arp_node) = {
2286   .function = ip4_arp,
2287   .name = "ip4-arp",
2288   .vector_size = sizeof (u32),
2289
2290   .format_trace = format_ip4_forward_next_trace,
2291
2292   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2293   .error_strings = ip4_arp_error_strings,
2294
2295   .n_next_nodes = IP4_ARP_N_NEXT,
2296   .next_nodes = {
2297     [IP4_ARP_NEXT_DROP] = "error-drop",
2298   },
2299 };
2300
2301 #define foreach_notrace_ip4_arp_error           \
2302 _(DROP)                                         \
2303 _(REQUEST_SENT)                                 \
2304 _(REPLICATE_DROP)                               \
2305 _(REPLICATE_FAIL)
2306
2307 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2308 {
2309   vlib_node_runtime_t *rt = 
2310     vlib_node_get_runtime (vm, ip4_arp_node.index);
2311
2312   /* don't trace ARP request packets */
2313 #define _(a)                                    \
2314     vnet_pcap_drop_trace_filter_add_del         \
2315         (rt->errors[IP4_ARP_ERROR_##a],         \
2316          1 /* is_add */);
2317     foreach_notrace_ip4_arp_error;
2318 #undef _
2319   return 0;
2320 }
2321
2322 VLIB_INIT_FUNCTION(arp_notrace_init);
2323
2324
2325 /* Send an ARP request to see if given destination is reachable on given interface. */
2326 clib_error_t *
2327 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2328 {
2329   vnet_main_t * vnm = vnet_get_main();
2330   ip4_main_t * im = &ip4_main;
2331   ethernet_arp_header_t * h;
2332   ip4_address_t * src;
2333   ip_interface_address_t * ia;
2334   ip_adjacency_t * adj;
2335   vnet_hw_interface_t * hi;
2336   vnet_sw_interface_t * si;
2337   vlib_buffer_t * b;
2338   u32 bi = 0;
2339
2340   si = vnet_get_sw_interface (vnm, sw_if_index);
2341
2342   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2343     {
2344       return clib_error_return (0, "%U: interface %U down",
2345                                 format_ip4_address, dst, 
2346                                 format_vnet_sw_if_index_name, vnm, 
2347                                 sw_if_index);
2348     }
2349
2350   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2351   if (! src)
2352     {
2353       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2354       return clib_error_return 
2355         (0, "no matching interface address for destination %U (interface %U)",
2356          format_ip4_address, dst,
2357          format_vnet_sw_if_index_name, vnm, sw_if_index);
2358     }
2359
2360   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2361
2362   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2363
2364   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2365
2366   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2367
2368   h->ip4_over_ethernet[0].ip4 = src[0];
2369   h->ip4_over_ethernet[1].ip4 = dst[0];
2370
2371   b = vlib_get_buffer (vm, bi);
2372   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2373
2374   /* Add encapsulation string for software interface (e.g. ethernet header). */
2375   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2376   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2377
2378   {
2379     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2380     u32 * to_next = vlib_frame_vector_args (f);
2381     to_next[0] = bi;
2382     f->n_vectors = 1;
2383     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2384   }
2385
2386   return /* no error */ 0;
2387 }
2388
2389 typedef enum {
2390   IP4_REWRITE_NEXT_DROP,
2391   IP4_REWRITE_NEXT_ARP,
2392 } ip4_rewrite_next_t;
2393
2394 always_inline uword
2395 ip4_rewrite_inline (vlib_main_t * vm,
2396                     vlib_node_runtime_t * node,
2397                     vlib_frame_t * frame,
2398                     int rewrite_for_locally_received_packets)
2399 {
2400   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2401   u32 * from = vlib_frame_vector_args (frame);
2402   u32 n_left_from, n_left_to_next, * to_next, next_index;
2403   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2404   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2405
2406   n_left_from = frame->n_vectors;
2407   next_index = node->cached_next_index;
2408   u32 cpu_index = os_get_cpu_number();
2409   
2410   while (n_left_from > 0)
2411     {
2412       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2413
2414       while (n_left_from >= 4 && n_left_to_next >= 2)
2415         {
2416           ip_adjacency_t * adj0, * adj1;
2417           vlib_buffer_t * p0, * p1;
2418           ip4_header_t * ip0, * ip1;
2419           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2420           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2421           u32 next0_override, next1_override;
2422       
2423           if (rewrite_for_locally_received_packets)
2424               next0_override = next1_override = 0;
2425
2426           /* Prefetch next iteration. */
2427           {
2428             vlib_buffer_t * p2, * p3;
2429
2430             p2 = vlib_get_buffer (vm, from[2]);
2431             p3 = vlib_get_buffer (vm, from[3]);
2432
2433             vlib_prefetch_buffer_header (p2, STORE);
2434             vlib_prefetch_buffer_header (p3, STORE);
2435
2436             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2437             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2438           }
2439
2440           pi0 = to_next[0] = from[0];
2441           pi1 = to_next[1] = from[1];
2442
2443           from += 2;
2444           n_left_from -= 2;
2445           to_next += 2;
2446           n_left_to_next -= 2;
2447       
2448           p0 = vlib_get_buffer (vm, pi0);
2449           p1 = vlib_get_buffer (vm, pi1);
2450
2451           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2452           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2453
2454           /* We should never rewrite a pkt using the MISS adjacency */
2455           ASSERT(adj_index0 && adj_index1);
2456
2457           ip0 = vlib_buffer_get_current (p0);
2458           ip1 = vlib_buffer_get_current (p1);
2459
2460           error0 = error1 = IP4_ERROR_NONE;
2461
2462           /* Decrement TTL & update checksum.
2463              Works either endian, so no need for byte swap. */
2464           if (! rewrite_for_locally_received_packets)
2465             {
2466               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2467
2468               /* Input node should have reject packets with ttl 0. */
2469               ASSERT (ip0->ttl > 0);
2470               ASSERT (ip1->ttl > 0);
2471
2472               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2473               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2474
2475               checksum0 += checksum0 >= 0xffff;
2476               checksum1 += checksum1 >= 0xffff;
2477
2478               ip0->checksum = checksum0;
2479               ip1->checksum = checksum1;
2480
2481               ttl0 -= 1;
2482               ttl1 -= 1;
2483
2484               ip0->ttl = ttl0;
2485               ip1->ttl = ttl1;
2486
2487               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2488               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2489
2490               /* Verify checksum. */
2491               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2492               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2493             }
2494
2495           /* Rewrite packet header and updates lengths. */
2496           adj0 = ip_get_adjacency (lm, adj_index0);
2497           adj1 = ip_get_adjacency (lm, adj_index1);
2498       
2499           if (rewrite_for_locally_received_packets)
2500             {
2501               /*
2502                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2503                * we end up here with a local adjacency in hand
2504                * The local adj rewrite data is 0xfefe on purpose.
2505                * Bad engineer, no donut for you.
2506                */
2507               if (PREDICT_FALSE(adj0->lookup_next_index 
2508                                 == IP_LOOKUP_NEXT_LOCAL))
2509                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2510               if (PREDICT_FALSE(adj0->lookup_next_index
2511                                 == IP_LOOKUP_NEXT_ARP))
2512                 next0_override = IP4_REWRITE_NEXT_ARP;
2513               if (PREDICT_FALSE(adj1->lookup_next_index 
2514                                 == IP_LOOKUP_NEXT_LOCAL))
2515                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2516               if (PREDICT_FALSE(adj1->lookup_next_index
2517                                 == IP_LOOKUP_NEXT_ARP))
2518                 next1_override = IP4_REWRITE_NEXT_ARP;
2519             }
2520
2521           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2522           rw_len0 = adj0[0].rewrite_header.data_bytes;
2523           rw_len1 = adj1[0].rewrite_header.data_bytes;
2524           next0 = (error0 == IP4_ERROR_NONE) 
2525             ? adj0[0].rewrite_header.next_index : 0;
2526
2527           if (rewrite_for_locally_received_packets)
2528               next0 = next0 && next0_override ? next0_override : next0;
2529
2530           next1 = (error1 == IP4_ERROR_NONE)
2531             ? adj1[0].rewrite_header.next_index : 0;
2532
2533           if (rewrite_for_locally_received_packets)
2534               next1 = next1 && next1_override ? next1_override : next1;
2535
2536           /* 
2537            * We've already accounted for an ethernet_header_t elsewhere
2538            */
2539           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2540               vlib_increment_combined_counter 
2541                   (&lm->adjacency_counters,
2542                    cpu_index, adj_index0, 
2543                    /* packet increment */ 0,
2544                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2545
2546           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2547               vlib_increment_combined_counter 
2548                   (&lm->adjacency_counters,
2549                    cpu_index, adj_index1, 
2550                    /* packet increment */ 0,
2551                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2552
2553           /* Check MTU of outgoing interface. */
2554           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2555                     ? IP4_ERROR_MTU_EXCEEDED
2556                     : error0);
2557           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2558                     ? IP4_ERROR_MTU_EXCEEDED
2559                     : error1);
2560
2561           p0->current_data -= rw_len0;
2562           p1->current_data -= rw_len1;
2563
2564           p0->current_length += rw_len0;
2565           p1->current_length += rw_len1;
2566
2567           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2568           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2569       
2570           p0->error = error_node->errors[error0];
2571           p1->error = error_node->errors[error1];
2572
2573           /* Guess we are only writing on simple Ethernet header. */
2574           vnet_rewrite_two_headers (adj0[0], adj1[0],
2575                                     ip0, ip1,
2576                                     sizeof (ethernet_header_t));
2577       
2578           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2579                                            to_next, n_left_to_next,
2580                                            pi0, pi1, next0, next1);
2581         }
2582
2583       while (n_left_from > 0 && n_left_to_next > 0)
2584         {
2585           ip_adjacency_t * adj0;
2586           vlib_buffer_t * p0;
2587           ip4_header_t * ip0;
2588           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2589           u32 next0_override;
2590       
2591           if (rewrite_for_locally_received_packets)
2592               next0_override = 0;
2593
2594           pi0 = to_next[0] = from[0];
2595
2596           p0 = vlib_get_buffer (vm, pi0);
2597
2598           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2599
2600           /* We should never rewrite a pkt using the MISS adjacency */
2601           ASSERT(adj_index0);
2602
2603           adj0 = ip_get_adjacency (lm, adj_index0);
2604       
2605           ip0 = vlib_buffer_get_current (p0);
2606
2607           error0 = IP4_ERROR_NONE;
2608           next0 = 0;            /* drop on error */
2609
2610           /* Decrement TTL & update checksum. */
2611           if (! rewrite_for_locally_received_packets)
2612             {
2613               i32 ttl0 = ip0->ttl;
2614
2615               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2616
2617               checksum0 += checksum0 >= 0xffff;
2618
2619               ip0->checksum = checksum0;
2620
2621               ASSERT (ip0->ttl > 0);
2622
2623               ttl0 -= 1;
2624
2625               ip0->ttl = ttl0;
2626
2627               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2628
2629               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2630             }
2631
2632           if (rewrite_for_locally_received_packets)
2633             {
2634               /*
2635                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2636                * we end up here with a local adjacency in hand
2637                * The local adj rewrite data is 0xfefe on purpose.
2638                * Bad engineer, no donut for you.
2639                */
2640               if (PREDICT_FALSE(adj0->lookup_next_index 
2641                                 == IP_LOOKUP_NEXT_LOCAL))
2642                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2643               /* 
2644                * We have to override the next_index in ARP adjacencies,
2645                * because they're set up for ip4-arp, not this node...
2646                */
2647               if (PREDICT_FALSE(adj0->lookup_next_index
2648                                 == IP_LOOKUP_NEXT_ARP))
2649                 next0_override = IP4_REWRITE_NEXT_ARP;
2650             }
2651
2652           /* Guess we are only writing on simple Ethernet header. */
2653           vnet_rewrite_one_header (adj0[0], ip0, 
2654                                    sizeof (ethernet_header_t));
2655           
2656           /* Update packet buffer attributes/set output interface. */
2657           rw_len0 = adj0[0].rewrite_header.data_bytes;
2658           
2659           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2660               vlib_increment_combined_counter 
2661                   (&lm->adjacency_counters,
2662                    cpu_index, adj_index0, 
2663                    /* packet increment */ 0,
2664                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2665           
2666           /* Check MTU of outgoing interface. */
2667           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2668                     > adj0[0].rewrite_header.max_l3_packet_bytes
2669                     ? IP4_ERROR_MTU_EXCEEDED
2670                     : error0);
2671           
2672           p0->error = error_node->errors[error0];
2673           p0->current_data -= rw_len0;
2674           p0->current_length += rw_len0;
2675           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2676             adj0[0].rewrite_header.sw_if_index;
2677           
2678           next0 = (error0 == IP4_ERROR_NONE)
2679             ? adj0[0].rewrite_header.next_index : 0;
2680
2681           if (rewrite_for_locally_received_packets)
2682               next0 = next0 && next0_override ? next0_override : next0;
2683
2684           from += 1;
2685           n_left_from -= 1;
2686           to_next += 1;
2687           n_left_to_next -= 1;
2688       
2689           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2690                                            to_next, n_left_to_next,
2691                                            pi0, next0);
2692         }
2693   
2694       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2695     }
2696
2697   /* Need to do trace after rewrites to pick up new packet data. */
2698   if (node->flags & VLIB_NODE_FLAG_TRACE)
2699     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2700
2701   return frame->n_vectors;
2702 }
2703
2704 static uword
2705 ip4_rewrite_transit (vlib_main_t * vm,
2706                      vlib_node_runtime_t * node,
2707                      vlib_frame_t * frame)
2708 {
2709   return ip4_rewrite_inline (vm, node, frame,
2710                              /* rewrite_for_locally_received_packets */ 0);
2711 }
2712
2713 static uword
2714 ip4_rewrite_local (vlib_main_t * vm,
2715                    vlib_node_runtime_t * node,
2716                    vlib_frame_t * frame)
2717 {
2718   return ip4_rewrite_inline (vm, node, frame,
2719                              /* rewrite_for_locally_received_packets */ 1);
2720 }
2721
2722 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2723   .function = ip4_rewrite_transit,
2724   .name = "ip4-rewrite-transit",
2725   .vector_size = sizeof (u32),
2726
2727   .format_trace = format_ip4_forward_next_trace,
2728
2729   .n_next_nodes = 2,
2730   .next_nodes = {
2731     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2732     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2733   },
2734 };
2735
2736 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2737   .function = ip4_rewrite_local,
2738   .name = "ip4-rewrite-local",
2739   .vector_size = sizeof (u32),
2740
2741   .sibling_of = "ip4-rewrite-transit",
2742
2743   .format_trace = format_ip4_forward_next_trace,
2744
2745   .n_next_nodes = 2,
2746   .next_nodes = {
2747     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2748     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2749   },
2750 };
2751
2752 static clib_error_t *
2753 add_del_interface_table (vlib_main_t * vm,
2754                          unformat_input_t * input,
2755                          vlib_cli_command_t * cmd)
2756 {
2757   vnet_main_t * vnm = vnet_get_main();
2758   clib_error_t * error = 0;
2759   u32 sw_if_index, table_id;
2760
2761   sw_if_index = ~0;
2762
2763   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2764     {
2765       error = clib_error_return (0, "unknown interface `%U'",
2766                                  format_unformat_error, input);
2767       goto done;
2768     }
2769
2770   if (unformat (input, "%d", &table_id))
2771     ;
2772   else
2773     {
2774       error = clib_error_return (0, "expected table id `%U'",
2775                                  format_unformat_error, input);
2776       goto done;
2777     }
2778
2779   {
2780     ip4_main_t * im = &ip4_main;
2781     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2782
2783     if (fib) 
2784       {
2785         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2786         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2787     }
2788   }
2789
2790  done:
2791   return error;
2792 }
2793
2794 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2795   .path = "set interface ip table",
2796   .function = add_del_interface_table,
2797   .short_help = "Add/delete FIB table id for interface",
2798 };
2799
2800
2801 static uword
2802 ip4_lookup_multicast (vlib_main_t * vm,
2803                       vlib_node_runtime_t * node,
2804                       vlib_frame_t * frame)
2805 {
2806   ip4_main_t * im = &ip4_main;
2807   ip_lookup_main_t * lm = &im->lookup_main;
2808   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2809   u32 n_left_from, n_left_to_next, * from, * to_next;
2810   ip_lookup_next_t next;
2811   u32 cpu_index = os_get_cpu_number();
2812
2813   from = vlib_frame_vector_args (frame);
2814   n_left_from = frame->n_vectors;
2815   next = node->cached_next_index;
2816
2817   while (n_left_from > 0)
2818     {
2819       vlib_get_next_frame (vm, node, next,
2820                            to_next, n_left_to_next);
2821
2822       while (n_left_from >= 4 && n_left_to_next >= 2)
2823         {
2824           vlib_buffer_t * p0, * p1;
2825           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2826           ip_lookup_next_t next0, next1;
2827           ip4_header_t * ip0, * ip1;
2828           ip_adjacency_t * adj0, * adj1;
2829           u32 fib_index0, fib_index1;
2830           u32 flow_hash_config0, flow_hash_config1;
2831
2832           /* Prefetch next iteration. */
2833           {
2834             vlib_buffer_t * p2, * p3;
2835
2836             p2 = vlib_get_buffer (vm, from[2]);
2837             p3 = vlib_get_buffer (vm, from[3]);
2838
2839             vlib_prefetch_buffer_header (p2, LOAD);
2840             vlib_prefetch_buffer_header (p3, LOAD);
2841
2842             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2843             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2844           }
2845
2846           pi0 = to_next[0] = from[0];
2847           pi1 = to_next[1] = from[1];
2848
2849           p0 = vlib_get_buffer (vm, pi0);
2850           p1 = vlib_get_buffer (vm, pi1);
2851
2852           ip0 = vlib_buffer_get_current (p0);
2853           ip1 = vlib_buffer_get_current (p1);
2854
2855           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2856           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2857           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2858             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2859           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2860             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2861
2862           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2863                                               &ip0->dst_address, p0);
2864           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2865                                               &ip1->dst_address, p1);
2866
2867           adj0 = ip_get_adjacency (lm, adj_index0);
2868           adj1 = ip_get_adjacency (lm, adj_index1);
2869
2870           next0 = adj0->lookup_next_index;
2871           next1 = adj1->lookup_next_index;
2872
2873           flow_hash_config0 = 
2874               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2875
2876           flow_hash_config1 = 
2877               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2878
2879           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2880               (ip0, flow_hash_config0);
2881                                                                   
2882           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2883               (ip1, flow_hash_config1);
2884
2885           ASSERT (adj0->n_adj > 0);
2886           ASSERT (adj1->n_adj > 0);
2887           ASSERT (is_pow2 (adj0->n_adj));
2888           ASSERT (is_pow2 (adj1->n_adj));
2889           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2890           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2891
2892           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2893           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2894
2895           if (1) /* $$$$$$ HACK FIXME */
2896           vlib_increment_combined_counter 
2897               (cm, cpu_index, adj_index0, 1,
2898                vlib_buffer_length_in_chain (vm, p0));
2899           if (1) /* $$$$$$ HACK FIXME */
2900           vlib_increment_combined_counter 
2901               (cm, cpu_index, adj_index1, 1,
2902                vlib_buffer_length_in_chain (vm, p1));
2903
2904           from += 2;
2905           to_next += 2;
2906           n_left_to_next -= 2;
2907           n_left_from -= 2;
2908
2909           wrong_next = (next0 != next) + 2*(next1 != next);
2910           if (PREDICT_FALSE (wrong_next != 0))
2911             {
2912               switch (wrong_next)
2913                 {
2914                 case 1:
2915                   /* A B A */
2916                   to_next[-2] = pi1;
2917                   to_next -= 1;
2918                   n_left_to_next += 1;
2919                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2920                   break;
2921
2922                 case 2:
2923                   /* A A B */
2924                   to_next -= 1;
2925                   n_left_to_next += 1;
2926                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2927                   break;
2928
2929                 case 3:
2930                   /* A B C */
2931                   to_next -= 2;
2932                   n_left_to_next += 2;
2933                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2934                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2935                   if (next0 == next1)
2936                     {
2937                       /* A B B */
2938                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2939                       next = next1;
2940                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2941                     }
2942                 }
2943             }
2944         }
2945     
2946       while (n_left_from > 0 && n_left_to_next > 0)
2947         {
2948           vlib_buffer_t * p0;
2949           ip4_header_t * ip0;
2950           u32 pi0, adj_index0;
2951           ip_lookup_next_t next0;
2952           ip_adjacency_t * adj0;
2953           u32 fib_index0;
2954           u32 flow_hash_config0;
2955
2956           pi0 = from[0];
2957           to_next[0] = pi0;
2958
2959           p0 = vlib_get_buffer (vm, pi0);
2960
2961           ip0 = vlib_buffer_get_current (p0);
2962
2963           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2964                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2965           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2966               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2967           
2968           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2969                                               &ip0->dst_address, p0);
2970
2971           adj0 = ip_get_adjacency (lm, adj_index0);
2972
2973           next0 = adj0->lookup_next_index;
2974
2975           flow_hash_config0 = 
2976               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2977
2978           vnet_buffer (p0)->ip.flow_hash = 
2979             ip4_compute_flow_hash (ip0, flow_hash_config0);
2980
2981           ASSERT (adj0->n_adj > 0);
2982           ASSERT (is_pow2 (adj0->n_adj));
2983           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2984
2985           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2986
2987           if (1) /* $$$$$$ HACK FIXME */
2988               vlib_increment_combined_counter 
2989                   (cm, cpu_index, adj_index0, 1,
2990                    vlib_buffer_length_in_chain (vm, p0));
2991
2992           from += 1;
2993           to_next += 1;
2994           n_left_to_next -= 1;
2995           n_left_from -= 1;
2996
2997           if (PREDICT_FALSE (next0 != next))
2998             {
2999               n_left_to_next += 1;
3000               vlib_put_next_frame (vm, node, next, n_left_to_next);
3001               next = next0;
3002               vlib_get_next_frame (vm, node, next,
3003                                    to_next, n_left_to_next);
3004               to_next[0] = pi0;
3005               to_next += 1;
3006               n_left_to_next -= 1;
3007             }
3008         }
3009
3010       vlib_put_next_frame (vm, node, next, n_left_to_next);
3011     }
3012
3013   return frame->n_vectors;
3014 }
3015
3016 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3017   .function = ip4_lookup_multicast,
3018   .name = "ip4-lookup-multicast",
3019   .vector_size = sizeof (u32),
3020
3021   .n_next_nodes = IP_LOOKUP_N_NEXT,
3022   .next_nodes = IP4_LOOKUP_NEXT_NODES,
3023 };
3024
3025 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3026   .function = ip4_drop,
3027   .name = "ip4-multicast",
3028   .vector_size = sizeof (u32),
3029
3030   .format_trace = format_ip4_forward_next_trace,
3031
3032   .n_next_nodes = 1,
3033   .next_nodes = {
3034     [0] = "error-drop",
3035   },
3036 };
3037
3038 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3039 {
3040   ip4_main_t * im = &ip4_main;
3041   ip4_fib_mtrie_t * mtrie0;
3042   ip4_fib_mtrie_leaf_t leaf0;
3043   u32 adj_index0;
3044     
3045   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3046
3047   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3048   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3049   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3050   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3051   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3052   
3053   /* Handle default route. */
3054   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3055   
3056   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3057   
3058   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3059                                                   a, 
3060                                                   /* no_default_route */ 0);
3061 }
3062  
3063 static clib_error_t *
3064 test_lookup_command_fn (vlib_main_t * vm,
3065                         unformat_input_t * input,
3066                         vlib_cli_command_t * cmd)
3067 {
3068   u32 table_id = 0;
3069   f64 count = 1;
3070   u32 n;
3071   int i;
3072   ip4_address_t ip4_base_address;
3073   u64 errors = 0;
3074
3075   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3076       if (unformat (input, "table %d", &table_id))
3077         ;
3078       else if (unformat (input, "count %f", &count))
3079         ;
3080
3081       else if (unformat (input, "%U",
3082                          unformat_ip4_address, &ip4_base_address))
3083         ;
3084       else
3085         return clib_error_return (0, "unknown input `%U'",
3086                                   format_unformat_error, input);
3087   }
3088
3089   n = count;
3090
3091   for (i = 0; i < n; i++)
3092     {
3093       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3094         errors++;
3095
3096       ip4_base_address.as_u32 = 
3097         clib_host_to_net_u32 (1 + 
3098                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3099     }
3100
3101   if (errors) 
3102     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3103   else
3104     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3105
3106   return 0;
3107 }
3108
3109 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3110     .path = "test lookup",
3111     .short_help = "test lookup",
3112     .function = test_lookup_command_fn,
3113 };
3114
3115 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3116 {
3117   ip4_main_t * im4 = &ip4_main;
3118   ip4_fib_t * fib;
3119   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3120
3121   if (p == 0)
3122     return VNET_API_ERROR_NO_SUCH_FIB;
3123
3124   fib = vec_elt_at_index (im4->fibs, p[0]);
3125
3126   fib->flow_hash_config = flow_hash_config;
3127   return 0;
3128 }
3129  
3130 static clib_error_t *
3131 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3132                              unformat_input_t * input,
3133                              vlib_cli_command_t * cmd)
3134 {
3135   int matched = 0;
3136   u32 table_id = 0;
3137   u32 flow_hash_config = 0;
3138   int rv;
3139
3140   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3141     if (unformat (input, "table %d", &table_id))
3142       matched = 1;
3143 #define _(a,v) \
3144     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3145     foreach_flow_hash_bit
3146 #undef _
3147     else break;
3148   }
3149   
3150   if (matched == 0)
3151     return clib_error_return (0, "unknown input `%U'",
3152                               format_unformat_error, input);
3153   
3154   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3155   switch (rv)
3156     {
3157     case 0:
3158       break;
3159       
3160     case VNET_API_ERROR_NO_SUCH_FIB:
3161       return clib_error_return (0, "no such FIB table %d", table_id);
3162       
3163     default:
3164       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3165       break;
3166     }
3167   
3168   return 0;
3169 }
3170  
3171 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3172   .path = "set ip flow-hash",
3173   .short_help = 
3174   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3175   .function = set_ip_flow_hash_command_fn,
3176 };
3177  
3178 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3179                                  u32 table_index)
3180 {
3181   vnet_main_t * vnm = vnet_get_main();
3182   vnet_interface_main_t * im = &vnm->interface_main;
3183   ip4_main_t * ipm = &ip4_main;
3184   ip_lookup_main_t * lm = &ipm->lookup_main;
3185   vnet_classify_main_t * cm = &vnet_classify_main;
3186
3187   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3188     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3189
3190   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3191     return VNET_API_ERROR_NO_SUCH_ENTRY;
3192
3193   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3194   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3195
3196   return 0;
3197 }
3198
3199 static clib_error_t *
3200 set_ip_classify_command_fn (vlib_main_t * vm,
3201                             unformat_input_t * input,
3202                             vlib_cli_command_t * cmd)
3203 {
3204   u32 table_index = ~0;
3205   int table_index_set = 0;
3206   u32 sw_if_index = ~0;
3207   int rv;
3208   
3209   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3210     if (unformat (input, "table-index %d", &table_index))
3211       table_index_set = 1;
3212     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3213                        vnet_get_main(), &sw_if_index))
3214       ;
3215     else
3216       break;
3217   }
3218       
3219   if (table_index_set == 0)
3220     return clib_error_return (0, "classify table-index must be specified");
3221
3222   if (sw_if_index == ~0)
3223     return clib_error_return (0, "interface / subif must be specified");
3224
3225   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3226
3227   switch (rv)
3228     {
3229     case 0:
3230       break;
3231
3232     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3233       return clib_error_return (0, "No such interface");
3234
3235     case VNET_API_ERROR_NO_SUCH_ENTRY:
3236       return clib_error_return (0, "No such classifier table");
3237     }
3238   return 0;
3239 }
3240
3241 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3242     .path = "set ip classify",
3243     .short_help = 
3244     "set ip classify intfc <int> table-index <index>",
3245     .function = set_ip_classify_command_fn,
3246 };
3247