Add support for installing ipv4 routes via unresolved next hop
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
246     {
247       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248       if (adj->share_count > 0)
249         adj->share_count --;
250     }
251
252   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253                                is_del ? old_adj_index : adj_index,
254                                is_del);
255
256   /* Delete old adjacency index if present and changed. */
257   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258       && old_adj_index != ~0
259       && old_adj_index != adj_index)
260     ip_del_adjacency (lm, old_adj_index);
261 }
262
263 void
264 ip4_add_del_route_next_hop (ip4_main_t * im,
265                             u32 flags,
266                             ip4_address_t * dst_address,
267                             u32 dst_address_length,
268                             ip4_address_t * next_hop,
269                             u32 next_hop_sw_if_index,
270                             u32 next_hop_weight, u32 adj_index, 
271                             u32 explicit_fib_index)
272 {
273   vnet_main_t * vnm = vnet_get_main();
274   ip_lookup_main_t * lm = &im->lookup_main;
275   u32 fib_index;
276   ip4_fib_t * fib;
277   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278   u32 dst_adj_index, nh_adj_index;
279   uword * dst_hash, * dst_result;
280   uword * nh_hash, * nh_result;
281   ip_adjacency_t * dst_adj;
282   ip_multipath_adjacency_t * old_mp, * new_mp;
283   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284   int is_interface_next_hop;
285   clib_error_t * error = 0;
286
287   if (explicit_fib_index == (u32)~0)
288       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
289   else
290       fib_index = explicit_fib_index;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293   
294   /* Lookup next hop to be added or deleted. */
295   is_interface_next_hop = next_hop->data_u32 == 0;
296   if (adj_index == (u32)~0)
297     {
298       if (is_interface_next_hop)
299         {
300           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
301           if (nh_result)
302             nh_adj_index = *nh_result;
303           else
304             {
305               ip_adjacency_t * adj;
306               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
307                                       &nh_adj_index);
308               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
311             }
312         }
313       else
314         {
315           nh_hash = fib->adj_index_by_dst_address[32];
316           nh_result = hash_get (nh_hash, next_hop->data_u32);
317           
318           /* Next hop must be known. */
319           if (! nh_result)
320             {
321               ip_adjacency_t * adj;
322
323               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
324                                                         next_hop, 0);
325               adj = ip_get_adjacency (lm, nh_adj_index);
326               /* if ARP interface adjacencty is present, we need to
327                  install ARP adjaceny for specific next hop */
328               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
329                   adj->arp.next_hop.ip4.as_u32 == 0)
330                 {
331                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
332                 }
333               else
334                 {
335                   vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
336                   error = clib_error_return (0, "next-hop %U/32 not in FIB",
337                                              format_ip4_address, next_hop);
338                   goto done;
339                 }
340             }
341           else
342             nh_adj_index = *nh_result;
343         }
344     }
345   else
346     {
347       nh_adj_index = adj_index;
348     }
349   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
350   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
351
352   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
353   dst_result = hash_get (dst_hash, dst_address_u32);
354   if (dst_result)
355     {
356       dst_adj_index = dst_result[0];
357       dst_adj = ip_get_adjacency (lm, dst_adj_index);
358     }
359   else
360     {
361       /* For deletes destination must be known. */
362       if (is_del)
363         {
364           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
365           error = clib_error_return (0, "unknown destination %U/%d",
366                                      format_ip4_address, dst_address,
367                                      dst_address_length);
368           goto done;
369         }
370
371       dst_adj_index = ~0;
372       dst_adj = 0;
373     }
374
375   /* Ignore adds of X/32 with next hop of X. */
376   if (! is_del
377       && dst_address_length == 32
378       && dst_address->data_u32 == next_hop->data_u32 
379       && adj_index != (u32)~0)
380     {
381       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
382       error = clib_error_return (0, "prefix matches next hop %U/%d",
383                                  format_ip4_address, dst_address,
384                                  dst_address_length);
385       goto done;
386     }
387
388   /* Destination is not known and default weight is set so add route
389      to existing non-multipath adjacency */
390   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
391     {
392       /* create new adjacency */
393       ip4_add_del_route_args_t a;
394       a.table_index_or_table_id = fib_index;
395       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
396                  | IP4_ROUTE_FLAG_FIB_INDEX
397                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
398                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
399                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
400       a.dst_address = dst_address[0];
401       a.dst_address_length = dst_address_length;
402       a.adj_index = nh_adj_index;
403       a.add_adj = 0;
404       a.n_add_adj = 0;
405
406       ip4_add_del_route (im, &a);
407
408       goto done;
409     }
410
411   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
412
413   if (! ip_multipath_adjacency_add_del_next_hop
414       (lm, is_del,
415        old_mp_adj_index,
416        nh_adj_index,
417        next_hop_weight,
418        &new_mp_adj_index))
419     {
420       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
421       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
422                                  format_ip4_address, next_hop);
423       goto done;
424     }
425   
426   old_mp = new_mp = 0;
427   if (old_mp_adj_index != ~0)
428     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
429   if (new_mp_adj_index != ~0)
430     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
431
432   if (old_mp != new_mp)
433     {
434       ip4_add_del_route_args_t a;
435       a.table_index_or_table_id = fib_index;
436       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
437                  | IP4_ROUTE_FLAG_FIB_INDEX
438                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
439                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
440       a.dst_address = dst_address[0];
441       a.dst_address_length = dst_address_length;
442       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
443       a.add_adj = 0;
444       a.n_add_adj = 0;
445
446       ip4_add_del_route (im, &a);
447     }
448
449  done:
450   if (error)
451     clib_error_report (error);
452 }
453
454 void *
455 ip4_get_route (ip4_main_t * im,
456                u32 table_index_or_table_id,
457                u32 flags,
458                u8 * address,
459                u32 address_length)
460 {
461   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
462   u32 dst_address = * (u32 *) address;
463   uword * hash, * p;
464
465   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
466   dst_address &= im->fib_masks[address_length];
467
468   hash = fib->adj_index_by_dst_address[address_length];
469   p = hash_get (hash, dst_address);
470   return (void *) p;
471 }
472
473 void
474 ip4_foreach_matching_route (ip4_main_t * im,
475                             u32 table_index_or_table_id,
476                             u32 flags,
477                             ip4_address_t * address,
478                             u32 address_length,
479                             ip4_address_t ** results,
480                             u8 ** result_lengths)
481 {
482   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
483   u32 dst_address = address->data_u32;
484   u32 this_length = address_length;
485   
486   if (*results)
487     _vec_len (*results) = 0;
488   if (*result_lengths)
489     _vec_len (*result_lengths) = 0;
490
491   while (this_length <= 32 && vec_len (results) == 0)
492     {
493       uword k, v;
494       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
495         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
496           {
497             ip4_address_t a;
498             a.data_u32 = k;
499             vec_add1 (*results, a);
500             vec_add1 (*result_lengths, this_length);
501           }
502       }));
503
504       this_length++;
505     }
506 }
507
508 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
509                                   u32 table_index_or_table_id,
510                                   u32 flags)
511 {
512   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
513   ip_lookup_main_t * lm = &im->lookup_main;
514   u32 i, l;
515   ip4_address_t a;
516   ip4_add_del_route_callback_t * cb;
517   static ip4_address_t * to_delete;
518
519   if (lm->n_adjacency_remaps == 0)
520     return;
521
522   for (l = 0; l <= 32; l++)
523     {
524       hash_pair_t * p;
525       uword * hash = fib->adj_index_by_dst_address[l];
526
527       if (hash_elts (hash) == 0)
528         continue;
529
530       if (to_delete)
531         _vec_len (to_delete) = 0;
532
533       hash_foreach_pair (p, hash, ({
534         u32 adj_index = p->value[0];
535         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
536
537         if (m)
538           {
539             /* Record destination address from hash key. */
540             a.data_u32 = p->key;
541
542             /* New adjacency points to nothing: so delete prefix. */
543             if (m == ~0)
544               vec_add1 (to_delete, a);
545             else
546               {
547                 /* Remap to new adjacency. */
548                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
549
550                 /* Set new adjacency value. */
551                 fib->new_hash_values[0] = p->value[0] = m - 1;
552
553                 vec_foreach (cb, im->add_del_route_callbacks)
554                   if ((flags & cb->required_flags) == cb->required_flags)
555                     cb->function (im, cb->function_opaque,
556                                   fib, flags | IP4_ROUTE_FLAG_ADD,
557                                   &a, l,
558                                   fib->old_hash_values,
559                                   fib->new_hash_values);
560               }
561           }
562       }));
563
564       fib->new_hash_values[0] = ~0;
565       for (i = 0; i < vec_len (to_delete); i++)
566         {
567           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
568           vec_foreach (cb, im->add_del_route_callbacks)
569             if ((flags & cb->required_flags) == cb->required_flags)
570               cb->function (im, cb->function_opaque,
571                             fib, flags | IP4_ROUTE_FLAG_DEL,
572                             &a, l,
573                             fib->old_hash_values,
574                             fib->new_hash_values);
575         }
576     }
577
578   /* Also remap adjacencies in mtrie. */
579   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
580
581   /* Reset mapping table. */
582   vec_zero (lm->adjacency_remap_table);
583
584   /* All remaps have been performed. */
585   lm->n_adjacency_remaps = 0;
586 }
587
588 void ip4_delete_matching_routes (ip4_main_t * im,
589                                  u32 table_index_or_table_id,
590                                  u32 flags,
591                                  ip4_address_t * address,
592                                  u32 address_length)
593 {
594   static ip4_address_t * matching_addresses;
595   static u8 * matching_address_lengths;
596   u32 l, i;
597   ip4_add_del_route_args_t a;
598
599   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
600   a.table_index_or_table_id = table_index_or_table_id;
601   a.adj_index = ~0;
602   a.add_adj = 0;
603   a.n_add_adj = 0;
604
605   for (l = address_length + 1; l <= 32; l++)
606     {
607       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
608                                   address,
609                                   l,
610                                   &matching_addresses,
611                                   &matching_address_lengths);
612       for (i = 0; i < vec_len (matching_addresses); i++)
613         {
614           a.dst_address = matching_addresses[i];
615           a.dst_address_length = matching_address_lengths[i];
616           ip4_add_del_route (im, &a);
617         }
618     }
619
620   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
621 }
622
623 always_inline uword
624 ip4_lookup_inline (vlib_main_t * vm,
625                    vlib_node_runtime_t * node,
626                    vlib_frame_t * frame,
627                    int lookup_for_responses_to_locally_received_packets)
628 {
629   ip4_main_t * im = &ip4_main;
630   ip_lookup_main_t * lm = &im->lookup_main;
631   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
632   u32 n_left_from, n_left_to_next, * from, * to_next;
633   ip_lookup_next_t next;
634   u32 cpu_index = os_get_cpu_number();
635
636   from = vlib_frame_vector_args (frame);
637   n_left_from = frame->n_vectors;
638   next = node->cached_next_index;
639
640   while (n_left_from > 0)
641     {
642       vlib_get_next_frame (vm, node, next,
643                            to_next, n_left_to_next);
644
645       while (n_left_from >= 4 && n_left_to_next >= 2)
646         {
647           vlib_buffer_t * p0, * p1;
648           ip4_header_t * ip0, * ip1;
649           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
650           ip_lookup_next_t next0, next1;
651           ip_adjacency_t * adj0, * adj1;
652           ip4_fib_mtrie_t * mtrie0, * mtrie1;
653           ip4_fib_mtrie_leaf_t leaf0, leaf1;
654           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
655           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
656           u32 flow_hash_config0, flow_hash_config1;
657           u32 hash_c0, hash_c1;
658           u32 wrong_next;
659
660           /* Prefetch next iteration. */
661           {
662             vlib_buffer_t * p2, * p3;
663
664             p2 = vlib_get_buffer (vm, from[2]);
665             p3 = vlib_get_buffer (vm, from[3]);
666
667             vlib_prefetch_buffer_header (p2, LOAD);
668             vlib_prefetch_buffer_header (p3, LOAD);
669
670             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
671             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
672           }
673
674           pi0 = to_next[0] = from[0];
675           pi1 = to_next[1] = from[1];
676
677           p0 = vlib_get_buffer (vm, pi0);
678           p1 = vlib_get_buffer (vm, pi1);
679
680           ip0 = vlib_buffer_get_current (p0);
681           ip1 = vlib_buffer_get_current (p1);
682
683           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
684           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
685           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
686             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
687           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
688             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
689
690
691           if (! lookup_for_responses_to_locally_received_packets)
692             {
693               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
694               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
695
696               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
697
698               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
699               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
700             }
701
702           tcp0 = (void *) (ip0 + 1);
703           tcp1 = (void *) (ip1 + 1);
704
705           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
706                          || ip0->protocol == IP_PROTOCOL_UDP);
707           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
708                          || ip1->protocol == IP_PROTOCOL_UDP);
709
710           if (! lookup_for_responses_to_locally_received_packets)
711             {
712               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
713               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
714             }
715
716           if (! lookup_for_responses_to_locally_received_packets)
717             {
718               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
719               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
720             }
721
722           if (! lookup_for_responses_to_locally_received_packets)
723             {
724               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
725               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
726             }
727
728           if (lookup_for_responses_to_locally_received_packets)
729             {
730               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
731               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
732             }
733           else
734             {
735               /* Handle default route. */
736               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
737               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
738
739               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
740               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
741             }
742
743           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
744                                                            &ip0->dst_address,
745                                                            /* no_default_route */ 0));
746           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
747                                                            &ip1->dst_address,
748                                                            /* no_default_route */ 0));
749           adj0 = ip_get_adjacency (lm, adj_index0);
750           adj1 = ip_get_adjacency (lm, adj_index1);
751
752           next0 = adj0->lookup_next_index;
753           next1 = adj1->lookup_next_index;
754
755           /* Use flow hash to compute multipath adjacency. */
756           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
757           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
758           if (PREDICT_FALSE (adj0->n_adj > 1))
759             {
760               flow_hash_config0 = 
761                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
762               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
763                 ip4_compute_flow_hash (ip0, flow_hash_config0);
764             }
765           if (PREDICT_FALSE(adj1->n_adj > 1))
766             {
767               flow_hash_config1 = 
768                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
769               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
770                 ip4_compute_flow_hash (ip1, flow_hash_config1);
771             }
772
773           ASSERT (adj0->n_adj > 0);
774           ASSERT (adj1->n_adj > 0);
775           ASSERT (is_pow2 (adj0->n_adj));
776           ASSERT (is_pow2 (adj1->n_adj));
777           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
778           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
779
780           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
781           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
782
783           vlib_increment_combined_counter 
784               (cm, cpu_index, adj_index0, 1,
785                vlib_buffer_length_in_chain (vm, p0) 
786                + sizeof(ethernet_header_t));
787           vlib_increment_combined_counter 
788               (cm, cpu_index, adj_index1, 1,
789                vlib_buffer_length_in_chain (vm, p1)
790                + sizeof(ethernet_header_t));
791
792           from += 2;
793           to_next += 2;
794           n_left_to_next -= 2;
795           n_left_from -= 2;
796
797           wrong_next = (next0 != next) + 2*(next1 != next);
798           if (PREDICT_FALSE (wrong_next != 0))
799             {
800               switch (wrong_next)
801                 {
802                 case 1:
803                   /* A B A */
804                   to_next[-2] = pi1;
805                   to_next -= 1;
806                   n_left_to_next += 1;
807                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
808                   break;
809
810                 case 2:
811                   /* A A B */
812                   to_next -= 1;
813                   n_left_to_next += 1;
814                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
815                   break;
816
817                 case 3:
818                   /* A B C */
819                   to_next -= 2;
820                   n_left_to_next += 2;
821                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
822                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
823                   if (next0 == next1)
824                     {
825                       /* A B B */
826                       vlib_put_next_frame (vm, node, next, n_left_to_next);
827                       next = next1;
828                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
829                     }
830                 }
831             }
832         }
833     
834       while (n_left_from > 0 && n_left_to_next > 0)
835         {
836           vlib_buffer_t * p0;
837           ip4_header_t * ip0;
838           __attribute__((unused)) tcp_header_t * tcp0;
839           ip_lookup_next_t next0;
840           ip_adjacency_t * adj0;
841           ip4_fib_mtrie_t * mtrie0;
842           ip4_fib_mtrie_leaf_t leaf0;
843           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
844           u32 flow_hash_config0, hash_c0;
845
846           pi0 = from[0];
847           to_next[0] = pi0;
848
849           p0 = vlib_get_buffer (vm, pi0);
850
851           ip0 = vlib_buffer_get_current (p0);
852
853           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
854           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
855             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
856
857           if (! lookup_for_responses_to_locally_received_packets)
858             {
859               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
860
861               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
862
863               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
864             }
865
866           tcp0 = (void *) (ip0 + 1);
867
868           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
869                          || ip0->protocol == IP_PROTOCOL_UDP);
870
871           if (! lookup_for_responses_to_locally_received_packets)
872             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
873
874           if (! lookup_for_responses_to_locally_received_packets)
875             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
876
877           if (! lookup_for_responses_to_locally_received_packets)
878             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
879
880           if (lookup_for_responses_to_locally_received_packets)
881             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
882           else
883             {
884               /* Handle default route. */
885               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
886               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
887             }
888
889           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
890                                                            &ip0->dst_address,
891                                                            /* no_default_route */ 0));
892
893           adj0 = ip_get_adjacency (lm, adj_index0);
894
895           next0 = adj0->lookup_next_index;
896
897           /* Use flow hash to compute multipath adjacency. */
898           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
899           if (PREDICT_FALSE(adj0->n_adj > 1))
900             {
901               flow_hash_config0 = 
902                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
903
904               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
905                 ip4_compute_flow_hash (ip0, flow_hash_config0);
906             }
907
908           ASSERT (adj0->n_adj > 0);
909           ASSERT (is_pow2 (adj0->n_adj));
910           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
911
912           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
913
914           vlib_increment_combined_counter 
915               (cm, cpu_index, adj_index0, 1,
916                vlib_buffer_length_in_chain (vm, p0)
917                + sizeof(ethernet_header_t));
918
919           from += 1;
920           to_next += 1;
921           n_left_to_next -= 1;
922           n_left_from -= 1;
923
924           if (PREDICT_FALSE (next0 != next))
925             {
926               n_left_to_next += 1;
927               vlib_put_next_frame (vm, node, next, n_left_to_next);
928               next = next0;
929               vlib_get_next_frame (vm, node, next,
930                                    to_next, n_left_to_next);
931               to_next[0] = pi0;
932               to_next += 1;
933               n_left_to_next -= 1;
934             }
935         }
936
937       vlib_put_next_frame (vm, node, next, n_left_to_next);
938     }
939
940   return frame->n_vectors;
941 }
942
943 static uword
944 ip4_lookup (vlib_main_t * vm,
945             vlib_node_runtime_t * node,
946             vlib_frame_t * frame)
947 {
948   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
949
950 }
951
952 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
953                                         ip_adjacency_t * adj,
954                                         u32 sw_if_index,
955                                         u32 if_address_index)
956 {
957   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
958   ip_lookup_next_t n;
959   vnet_l3_packet_type_t packet_type;
960   u32 node_index;
961
962   if (hw->hw_class_index == ethernet_hw_interface_class.index
963       || hw->hw_class_index == srp_hw_interface_class.index)
964     {
965       /* 
966        * We have a bit of a problem in this case. ip4-arp uses
967        * the rewrite_header.next_index to hand pkts to the
968        * indicated inteface output node. We can end up in
969        * ip4_rewrite_local, too, which also pays attention to 
970        * rewrite_header.next index. Net result: a hack in
971        * ip4_rewrite_local...
972        */
973       n = IP_LOOKUP_NEXT_ARP;
974       node_index = ip4_arp_node.index;
975       adj->if_address_index = if_address_index;
976       adj->arp.next_hop.ip4.as_u32 = 0;
977       packet_type = VNET_L3_PACKET_TYPE_ARP;
978     }
979   else
980     {
981       n = IP_LOOKUP_NEXT_REWRITE;
982       node_index = ip4_rewrite_node.index;
983       packet_type = VNET_L3_PACKET_TYPE_IP4;
984     }
985
986   adj->lookup_next_index = n;
987   vnet_rewrite_for_sw_interface
988     (vnm,
989      packet_type,
990      sw_if_index,
991      node_index,
992      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
993      &adj->rewrite_header,
994      sizeof (adj->rewrite_data));
995 }
996
997 static void
998 ip4_add_interface_routes (u32 sw_if_index,
999                           ip4_main_t * im, u32 fib_index,
1000                           ip_interface_address_t * a)
1001 {
1002   vnet_main_t * vnm = vnet_get_main();
1003   ip_lookup_main_t * lm = &im->lookup_main;
1004   ip_adjacency_t * adj;
1005   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1006   ip4_add_del_route_args_t x;
1007   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1008   u32 classify_table_index;
1009
1010   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1011   x.table_index_or_table_id = fib_index;
1012   x.flags = (IP4_ROUTE_FLAG_ADD
1013              | IP4_ROUTE_FLAG_FIB_INDEX
1014              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1015   x.dst_address = address[0];
1016   x.dst_address_length = a->address_length;
1017   x.n_add_adj = 0;
1018   x.add_adj = 0;
1019
1020   a->neighbor_probe_adj_index = ~0;
1021   if (a->address_length < 32)
1022     {
1023       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1024                               &x.adj_index);
1025       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1026       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1027       ip4_add_del_route (im, &x);
1028       a->neighbor_probe_adj_index = x.adj_index;
1029     }
1030   
1031   /* Add e.g. 1.1.1.1/32 as local to this host. */
1032   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1033                           &x.adj_index);
1034   
1035   classify_table_index = ~0;
1036   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1037     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1038   if (classify_table_index != (u32) ~0)
1039     {
1040       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1041       adj->classify.table_index = classify_table_index;
1042     }
1043   else
1044     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1045   
1046   adj->if_address_index = a - lm->if_address_pool;
1047   adj->rewrite_header.sw_if_index = sw_if_index;
1048   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1049   /* 
1050    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1051    * fail an RPF-ish check, but still go thru the rewrite code...
1052    */
1053   adj->rewrite_header.data_bytes = 0;
1054
1055   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1056   x.dst_address_length = 32;
1057   ip4_add_del_route (im, &x);
1058 }
1059
1060 static void
1061 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1062 {
1063   ip4_add_del_route_args_t x;
1064
1065   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1066   x.table_index_or_table_id = fib_index;
1067   x.flags = (IP4_ROUTE_FLAG_DEL
1068              | IP4_ROUTE_FLAG_FIB_INDEX
1069              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1070   x.dst_address = address[0];
1071   x.dst_address_length = address_length;
1072   x.adj_index = ~0;
1073   x.n_add_adj = 0;
1074   x.add_adj = 0;
1075
1076   if (address_length < 32)
1077     ip4_add_del_route (im, &x);
1078
1079   x.dst_address_length = 32;
1080   ip4_add_del_route (im, &x);
1081
1082   ip4_delete_matching_routes (im,
1083                               fib_index,
1084                               IP4_ROUTE_FLAG_FIB_INDEX,
1085                               address,
1086                               address_length);
1087 }
1088
1089 typedef struct {
1090     u32 sw_if_index;
1091     ip4_address_t address;
1092     u32 length;
1093 } ip4_interface_address_t;
1094
1095 static clib_error_t *
1096 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1097                                         u32 sw_if_index,
1098                                         ip4_address_t * new_address,
1099                                         u32 new_length,
1100                                         u32 redistribute,
1101                                         u32 insert_routes,
1102                                         u32 is_del);
1103
1104 static clib_error_t *
1105 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1106                                         u32 sw_if_index,
1107                                         ip4_address_t * address,
1108                                         u32 address_length,
1109                                         u32 redistribute,
1110                                         u32 insert_routes,
1111                                         u32 is_del)
1112 {
1113   vnet_main_t * vnm = vnet_get_main();
1114   ip4_main_t * im = &ip4_main;
1115   ip_lookup_main_t * lm = &im->lookup_main;
1116   clib_error_t * error = 0;
1117   u32 if_address_index, elts_before;
1118   ip4_address_fib_t ip4_af, * addr_fib = 0;
1119
1120   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1121   ip4_addr_fib_init (&ip4_af, address,
1122                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1123   vec_add1 (addr_fib, ip4_af);
1124
1125   /* When adding an address check that it does not conflict with an existing address. */
1126   if (! is_del)
1127     {
1128       ip_interface_address_t * ia;
1129       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1130                                     0 /* honor unnumbered */,
1131       ({
1132         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1133
1134         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1135             || ip4_destination_matches_route (im, x, address, address_length))
1136           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1137                                     format_ip4_address_and_length, address, address_length,
1138                                     format_ip4_address_and_length, x, ia->address_length,
1139                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1140       }));
1141     }
1142
1143   elts_before = pool_elts (lm->if_address_pool);
1144
1145   error = ip_interface_address_add_del
1146     (lm,
1147      sw_if_index,
1148      addr_fib,
1149      address_length,
1150      is_del,
1151      &if_address_index);
1152   if (error)
1153     goto done;
1154   
1155   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1156     {
1157       if (is_del)
1158         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1159                                   address_length);
1160       
1161       else
1162           ip4_add_interface_routes (sw_if_index,
1163                                     im, ip4_af.fib_index,
1164                                     pool_elt_at_index 
1165                                     (lm->if_address_pool, if_address_index));
1166     }
1167
1168   /* If pool did not grow/shrink: add duplicate address. */
1169   if (elts_before != pool_elts (lm->if_address_pool))
1170     {
1171       ip4_add_del_interface_address_callback_t * cb;
1172       vec_foreach (cb, im->add_del_interface_address_callbacks)
1173         cb->function (im, cb->function_opaque, sw_if_index,
1174                       address, address_length,
1175                       if_address_index,
1176                       is_del);
1177     }
1178
1179  done:
1180   vec_free (addr_fib);
1181   return error;
1182 }
1183
1184 clib_error_t *
1185 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1186                                ip4_address_t * address, u32 address_length,
1187                                u32 is_del)
1188 {
1189   return ip4_add_del_interface_address_internal
1190     (vm, sw_if_index, address, address_length,
1191      /* redistribute */ 1,
1192      /* insert_routes */ 1,
1193      is_del);
1194 }
1195
1196 static clib_error_t *
1197 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1198                                 u32 sw_if_index,
1199                                 u32 flags)
1200 {
1201   ip4_main_t * im = &ip4_main;
1202   ip_interface_address_t * ia;
1203   ip4_address_t * a;
1204   u32 is_admin_up, fib_index;
1205   
1206   /* Fill in lookup tables with default table (0). */
1207   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1208   
1209   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1210   
1211   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1212   
1213   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1214
1215   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1216                                 0 /* honor unnumbered */,
1217   ({
1218     a = ip_interface_address_get_address (&im->lookup_main, ia);
1219     if (is_admin_up)
1220       ip4_add_interface_routes (sw_if_index,
1221                                 im, fib_index,
1222                                 ia);
1223     else
1224       ip4_del_interface_routes (im, fib_index,
1225                                 a, ia->address_length);
1226   }));
1227
1228   return 0;
1229 }
1230  
1231 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1232
1233 static clib_error_t *
1234 ip4_sw_interface_add_del (vnet_main_t * vnm,
1235                           u32 sw_if_index,
1236                           u32 is_add)
1237 {
1238   vlib_main_t * vm = vnm->vlib_main;
1239   ip4_main_t * im = &ip4_main;
1240   ip_lookup_main_t * lm = &im->lookup_main;
1241   u32 ci, cast;
1242
1243   for (cast = 0; cast < VNET_N_CAST; cast++)
1244     {
1245       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1246       vnet_config_main_t * vcm = &cm->config_main;
1247
1248       if (! vcm->node_index_by_feature_index)
1249         {
1250           if (cast == VNET_UNICAST)
1251             {
1252               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1253               static char * feature_nodes[] = {
1254                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1255                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1256                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1257                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1258                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1259                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1260               };
1261
1262               vnet_config_init (vm, vcm,
1263                                 start_nodes, ARRAY_LEN (start_nodes),
1264                                 feature_nodes, ARRAY_LEN (feature_nodes));
1265             }
1266           else
1267             {
1268               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1269               static char * feature_nodes[] = {
1270                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1271                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1272               };
1273
1274               vnet_config_init (vm, vcm,
1275                                 start_nodes, ARRAY_LEN (start_nodes),
1276                                 feature_nodes, ARRAY_LEN (feature_nodes));
1277             }
1278         }
1279
1280       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1281       ci = cm->config_index_by_sw_if_index[sw_if_index];
1282
1283       if (is_add)
1284         ci = vnet_config_add_feature (vm, vcm,
1285                                       ci,
1286                                       IP4_RX_FEATURE_LOOKUP,
1287                                       /* config data */ 0,
1288                                       /* # bytes of config data */ 0);
1289       else
1290         ci = vnet_config_del_feature (vm, vcm,
1291                                       ci,
1292                                       IP4_RX_FEATURE_LOOKUP,
1293                                       /* config data */ 0,
1294                                       /* # bytes of config data */ 0);
1295
1296       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1297     }
1298
1299   return /* no error */ 0;
1300 }
1301
1302 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1303
1304 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1305   .function = ip4_lookup,
1306   .name = "ip4-lookup",
1307   .vector_size = sizeof (u32),
1308
1309   .n_next_nodes = IP_LOOKUP_N_NEXT,
1310   .next_nodes = {
1311     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
1312     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
1313     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
1314     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
1315     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
1316     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
1317     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
1318     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
1319     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
1320     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
1321     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
1322     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
1323     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
1324   },
1325 };
1326
1327 /* Global IP4 main. */
1328 ip4_main_t ip4_main;
1329
1330 clib_error_t *
1331 ip4_lookup_init (vlib_main_t * vm)
1332 {
1333   ip4_main_t * im = &ip4_main;
1334   uword i;
1335
1336   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1337     {
1338       u32 m;
1339
1340       if (i < 32)
1341         m = pow2_mask (i) << (32 - i);
1342       else 
1343         m = ~0;
1344       im->fib_masks[i] = clib_host_to_net_u32 (m);
1345     }
1346
1347   /* Create FIB with index 0 and table id of 0. */
1348   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1349
1350   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1351
1352   {
1353     pg_node_t * pn;
1354     pn = pg_get_node (ip4_lookup_node.index);
1355     pn->unformat_edit = unformat_pg_ip4_header;
1356   }
1357
1358   {
1359     ethernet_arp_header_t h;
1360
1361     memset (&h, 0, sizeof (h));
1362
1363     /* Set target ethernet address to all zeros. */
1364     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1365
1366 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1367 #define _8(f,v) h.f = v;
1368     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1369     _16 (l3_type, ETHERNET_TYPE_IP4);
1370     _8 (n_l2_address_bytes, 6);
1371     _8 (n_l3_address_bytes, 4);
1372     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1373 #undef _16
1374 #undef _8
1375
1376     vlib_packet_template_init (vm,
1377                                &im->ip4_arp_request_packet_template,
1378                                /* data */ &h,
1379                                sizeof (h),
1380                                /* alloc chunk size */ 8,
1381                                "ip4 arp");
1382   }
1383
1384   return 0;
1385 }
1386
1387 VLIB_INIT_FUNCTION (ip4_lookup_init);
1388
1389 typedef struct {
1390   /* Adjacency taken. */
1391   u32 adj_index;
1392   u32 flow_hash;
1393   u32 fib_index;
1394
1395   /* Packet data, possibly *after* rewrite. */
1396   u8 packet_data[64 - 1*sizeof(u32)];
1397 } ip4_forward_next_trace_t;
1398
1399 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1400 {
1401   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1402   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1403   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1404   vnet_main_t * vnm = vnet_get_main();
1405   ip4_main_t * im = &ip4_main;
1406   ip_adjacency_t * adj;
1407   uword indent = format_get_indent (s);
1408
1409   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1410   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1411               t->fib_index, t->adj_index, format_ip_adjacency,
1412               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1413   switch (adj->lookup_next_index)
1414     {
1415     case IP_LOOKUP_NEXT_REWRITE:
1416       s = format (s, "\n%U%U",
1417                   format_white_space, indent,
1418                   format_ip_adjacency_packet_data,
1419                   vnm, &im->lookup_main, t->adj_index,
1420                   t->packet_data, sizeof (t->packet_data));
1421       break;
1422
1423     default:
1424       break;
1425     }
1426
1427   return s;
1428 }
1429
1430 /* Common trace function for all ip4-forward next nodes. */
1431 void
1432 ip4_forward_next_trace (vlib_main_t * vm,
1433                         vlib_node_runtime_t * node,
1434                         vlib_frame_t * frame,
1435                         vlib_rx_or_tx_t which_adj_index)
1436 {
1437   u32 * from, n_left;
1438   ip4_main_t * im = &ip4_main;
1439
1440   n_left = frame->n_vectors;
1441   from = vlib_frame_vector_args (frame);
1442   
1443   while (n_left >= 4)
1444     {
1445       u32 bi0, bi1;
1446       vlib_buffer_t * b0, * b1;
1447       ip4_forward_next_trace_t * t0, * t1;
1448
1449       /* Prefetch next iteration. */
1450       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1451       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1452
1453       bi0 = from[0];
1454       bi1 = from[1];
1455
1456       b0 = vlib_get_buffer (vm, bi0);
1457       b1 = vlib_get_buffer (vm, bi1);
1458
1459       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1460         {
1461           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1462           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1463           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1464           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1465                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1466           memcpy (t0->packet_data,
1467                   vlib_buffer_get_current (b0),
1468                   sizeof (t0->packet_data));
1469         }
1470       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1471         {
1472           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1473           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1474           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1475           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1476                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1477           memcpy (t1->packet_data,
1478                   vlib_buffer_get_current (b1),
1479                   sizeof (t1->packet_data));
1480         }
1481       from += 2;
1482       n_left -= 2;
1483     }
1484
1485   while (n_left >= 1)
1486     {
1487       u32 bi0;
1488       vlib_buffer_t * b0;
1489       ip4_forward_next_trace_t * t0;
1490
1491       bi0 = from[0];
1492
1493       b0 = vlib_get_buffer (vm, bi0);
1494
1495       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1496         {
1497           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1498           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1499           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1500           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1501                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1502           memcpy (t0->packet_data,
1503                   vlib_buffer_get_current (b0),
1504                   sizeof (t0->packet_data));
1505         }
1506       from += 1;
1507       n_left -= 1;
1508     }
1509 }
1510
1511 static uword
1512 ip4_drop_or_punt (vlib_main_t * vm,
1513                   vlib_node_runtime_t * node,
1514                   vlib_frame_t * frame,
1515                   ip4_error_t error_code)
1516 {
1517   u32 * buffers = vlib_frame_vector_args (frame);
1518   uword n_packets = frame->n_vectors;
1519
1520   vlib_error_drop_buffers (vm, node,
1521                            buffers,
1522                            /* stride */ 1,
1523                            n_packets,
1524                            /* next */ 0,
1525                            ip4_input_node.index,
1526                            error_code);
1527
1528   if (node->flags & VLIB_NODE_FLAG_TRACE)
1529     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1530
1531   return n_packets;
1532 }
1533
1534 static uword
1535 ip4_drop (vlib_main_t * vm,
1536           vlib_node_runtime_t * node,
1537           vlib_frame_t * frame)
1538 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1539
1540 static uword
1541 ip4_punt (vlib_main_t * vm,
1542           vlib_node_runtime_t * node,
1543           vlib_frame_t * frame)
1544 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1545
1546 static uword
1547 ip4_miss (vlib_main_t * vm,
1548           vlib_node_runtime_t * node,
1549           vlib_frame_t * frame)
1550 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1551
1552 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1553   .function = ip4_drop,
1554   .name = "ip4-drop",
1555   .vector_size = sizeof (u32),
1556
1557   .format_trace = format_ip4_forward_next_trace,
1558
1559   .n_next_nodes = 1,
1560   .next_nodes = {
1561     [0] = "error-drop",
1562   },
1563 };
1564
1565 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1566   .function = ip4_punt,
1567   .name = "ip4-punt",
1568   .vector_size = sizeof (u32),
1569
1570   .format_trace = format_ip4_forward_next_trace,
1571
1572   .n_next_nodes = 1,
1573   .next_nodes = {
1574     [0] = "error-punt",
1575   },
1576 };
1577
1578 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1579   .function = ip4_miss,
1580   .name = "ip4-miss",
1581   .vector_size = sizeof (u32),
1582
1583   .format_trace = format_ip4_forward_next_trace,
1584
1585   .n_next_nodes = 1,
1586   .next_nodes = {
1587     [0] = "error-drop",
1588   },
1589 };
1590
1591 /* Compute TCP/UDP/ICMP4 checksum in software. */
1592 u16
1593 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1594                               ip4_header_t * ip0)
1595 {
1596   ip_csum_t sum0;
1597   u32 ip_header_length, payload_length_host_byte_order;
1598   u32 n_this_buffer, n_bytes_left;
1599   u16 sum16;
1600   void * data_this_buffer;
1601   
1602   /* Initialize checksum with ip header. */
1603   ip_header_length = ip4_header_bytes (ip0);
1604   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1605   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1606
1607   if (BITS (uword) == 32)
1608     {
1609       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1610       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1611     }
1612   else
1613     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1614
1615   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1616   data_this_buffer = (void *) ip0 + ip_header_length;
1617   if (n_this_buffer + ip_header_length > p0->current_length)
1618     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1619   while (1)
1620     {
1621       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1622       n_bytes_left -= n_this_buffer;
1623       if (n_bytes_left == 0)
1624         break;
1625
1626       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1627       p0 = vlib_get_buffer (vm, p0->next_buffer);
1628       data_this_buffer = vlib_buffer_get_current (p0);
1629       n_this_buffer = p0->current_length;
1630     }
1631
1632   sum16 = ~ ip_csum_fold (sum0);
1633
1634   return sum16;
1635 }
1636
1637 static u32
1638 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1639 {
1640   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1641   udp_header_t * udp0;
1642   u16 sum16;
1643
1644   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1645           || ip0->protocol == IP_PROTOCOL_UDP);
1646
1647   udp0 = (void *) (ip0 + 1);
1648   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1649     {
1650       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1651                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1652       return p0->flags;
1653     }
1654
1655   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1656
1657   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1658                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1659
1660   return p0->flags;
1661 }
1662
1663 static uword
1664 ip4_local (vlib_main_t * vm,
1665            vlib_node_runtime_t * node,
1666            vlib_frame_t * frame)
1667 {
1668   ip4_main_t * im = &ip4_main;
1669   ip_lookup_main_t * lm = &im->lookup_main;
1670   ip_local_next_t next_index;
1671   u32 * from, * to_next, n_left_from, n_left_to_next;
1672   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1673
1674   from = vlib_frame_vector_args (frame);
1675   n_left_from = frame->n_vectors;
1676   next_index = node->cached_next_index;
1677   
1678   if (node->flags & VLIB_NODE_FLAG_TRACE)
1679     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1680
1681   while (n_left_from > 0)
1682     {
1683       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1684
1685       while (n_left_from >= 4 && n_left_to_next >= 2)
1686         {
1687           vlib_buffer_t * p0, * p1;
1688           ip4_header_t * ip0, * ip1;
1689           udp_header_t * udp0, * udp1;
1690           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1691           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1692           ip_adjacency_t * adj0, * adj1;
1693           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1694           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1695           i32 len_diff0, len_diff1;
1696           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1697           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1698           u8 enqueue_code;
1699       
1700           pi0 = to_next[0] = from[0];
1701           pi1 = to_next[1] = from[1];
1702           from += 2;
1703           n_left_from -= 2;
1704           to_next += 2;
1705           n_left_to_next -= 2;
1706       
1707           p0 = vlib_get_buffer (vm, pi0);
1708           p1 = vlib_get_buffer (vm, pi1);
1709
1710           ip0 = vlib_buffer_get_current (p0);
1711           ip1 = vlib_buffer_get_current (p1);
1712
1713           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1714                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1715           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1716                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1717
1718           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1719           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1720
1721           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1722
1723           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1724           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1725
1726           proto0 = ip0->protocol;
1727           proto1 = ip1->protocol;
1728           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1729           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1730           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1731           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1732
1733           flags0 = p0->flags;
1734           flags1 = p1->flags;
1735
1736           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1737           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1738
1739           udp0 = ip4_next_header (ip0);
1740           udp1 = ip4_next_header (ip1);
1741
1742           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1743           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1744           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1745
1746           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1747           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1748
1749           /* Verify UDP length. */
1750           ip_len0 = clib_net_to_host_u16 (ip0->length);
1751           ip_len1 = clib_net_to_host_u16 (ip1->length);
1752           udp_len0 = clib_net_to_host_u16 (udp0->length);
1753           udp_len1 = clib_net_to_host_u16 (udp1->length);
1754
1755           len_diff0 = ip_len0 - udp_len0;
1756           len_diff1 = ip_len1 - udp_len1;
1757
1758           len_diff0 = is_udp0 ? len_diff0 : 0;
1759           len_diff1 = is_udp1 ? len_diff1 : 0;
1760
1761           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1762                                 & good_tcp_udp0 & good_tcp_udp1)))
1763             {
1764               if (is_tcp_udp0)
1765                 {
1766                   if (is_tcp_udp0
1767                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1768                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1769                   good_tcp_udp0 =
1770                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1771                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1772                 }
1773               if (is_tcp_udp1)
1774                 {
1775                   if (is_tcp_udp1
1776                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1777                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1778                   good_tcp_udp1 =
1779                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1780                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1781                 }
1782             }
1783
1784           good_tcp_udp0 &= len_diff0 >= 0;
1785           good_tcp_udp1 &= len_diff1 >= 0;
1786
1787           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1788           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1789
1790           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1791
1792           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1793           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1794
1795           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1796           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1797                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1798                     : error0);
1799           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1800                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1801                     : error1);
1802
1803           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1804           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1805
1806           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1807           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1808
1809           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1810           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1811
1812           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1813                                                            &ip0->src_address,
1814                                                            /* no_default_route */ 1));
1815           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1816                                                            &ip1->src_address,
1817                                                            /* no_default_route */ 1));
1818
1819           adj0 = ip_get_adjacency (lm, adj_index0);
1820           adj1 = ip_get_adjacency (lm, adj_index1);
1821
1822           /* 
1823            * Must have a route to source otherwise we drop the packet.
1824            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1825            */
1826           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1827                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1828                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1829                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1830                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1831                     ? IP4_ERROR_SRC_LOOKUP_MISS
1832                     : error0);
1833           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1834                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1835                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1836                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1837                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1838                     ? IP4_ERROR_SRC_LOOKUP_MISS
1839                     : error1);
1840
1841           next0 = lm->local_next_by_ip_protocol[proto0];
1842           next1 = lm->local_next_by_ip_protocol[proto1];
1843
1844           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1845           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1846
1847           p0->error = error0 ? error_node->errors[error0] : 0;
1848           p1->error = error1 ? error_node->errors[error1] : 0;
1849
1850           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1851
1852           if (PREDICT_FALSE (enqueue_code != 0))
1853             {
1854               switch (enqueue_code)
1855                 {
1856                 case 1:
1857                   /* A B A */
1858                   to_next[-2] = pi1;
1859                   to_next -= 1;
1860                   n_left_to_next += 1;
1861                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1862                   break;
1863
1864                 case 2:
1865                   /* A A B */
1866                   to_next -= 1;
1867                   n_left_to_next += 1;
1868                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1869                   break;
1870
1871                 case 3:
1872                   /* A B B or A B C */
1873                   to_next -= 2;
1874                   n_left_to_next += 2;
1875                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1876                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1877                   if (next0 == next1)
1878                     {
1879                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1880                       next_index = next1;
1881                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1882                     }
1883                   break;
1884                 }
1885             }
1886         }
1887
1888       while (n_left_from > 0 && n_left_to_next > 0)
1889         {
1890           vlib_buffer_t * p0;
1891           ip4_header_t * ip0;
1892           udp_header_t * udp0;
1893           ip4_fib_mtrie_t * mtrie0;
1894           ip4_fib_mtrie_leaf_t leaf0;
1895           ip_adjacency_t * adj0;
1896           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1897           i32 len_diff0;
1898           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1899       
1900           pi0 = to_next[0] = from[0];
1901           from += 1;
1902           n_left_from -= 1;
1903           to_next += 1;
1904           n_left_to_next -= 1;
1905       
1906           p0 = vlib_get_buffer (vm, pi0);
1907
1908           ip0 = vlib_buffer_get_current (p0);
1909
1910           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1911                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1912
1913           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1914
1915           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1916
1917           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1918
1919           proto0 = ip0->protocol;
1920           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1921           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1922
1923           flags0 = p0->flags;
1924
1925           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1926
1927           udp0 = ip4_next_header (ip0);
1928
1929           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1930           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1931
1932           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1933
1934           /* Verify UDP length. */
1935           ip_len0 = clib_net_to_host_u16 (ip0->length);
1936           udp_len0 = clib_net_to_host_u16 (udp0->length);
1937
1938           len_diff0 = ip_len0 - udp_len0;
1939
1940           len_diff0 = is_udp0 ? len_diff0 : 0;
1941
1942           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1943             {
1944               if (is_tcp_udp0)
1945                 {
1946                   if (is_tcp_udp0
1947                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1948                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1949                   good_tcp_udp0 =
1950                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1951                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1952                 }
1953             }
1954
1955           good_tcp_udp0 &= len_diff0 >= 0;
1956
1957           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1958
1959           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1960
1961           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1962
1963           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1964           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1965                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1966                     : error0);
1967
1968           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1969
1970           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1971           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1972
1973           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1974                                                            &ip0->src_address,
1975                                                            /* no_default_route */ 1));
1976
1977           adj0 = ip_get_adjacency (lm, adj_index0);
1978
1979           /* Must have a route to source otherwise we drop the packet. */
1980           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1981                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1982                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1983                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1984                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1985                     ? IP4_ERROR_SRC_LOOKUP_MISS
1986                     : error0);
1987
1988           next0 = lm->local_next_by_ip_protocol[proto0];
1989
1990           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1991
1992           p0->error = error0? error_node->errors[error0] : 0;
1993
1994           if (PREDICT_FALSE (next0 != next_index))
1995             {
1996               n_left_to_next += 1;
1997               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1998
1999               next_index = next0;
2000               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2001               to_next[0] = pi0;
2002               to_next += 1;
2003               n_left_to_next -= 1;
2004             }
2005         }
2006   
2007       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2008     }
2009
2010   return frame->n_vectors;
2011 }
2012
2013 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2014   .function = ip4_local,
2015   .name = "ip4-local",
2016   .vector_size = sizeof (u32),
2017
2018   .format_trace = format_ip4_forward_next_trace,
2019
2020   .n_next_nodes = IP_LOCAL_N_NEXT,
2021   .next_nodes = {
2022     [IP_LOCAL_NEXT_DROP] = "error-drop",
2023     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2024     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2025     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2026   },
2027 };
2028
2029 void ip4_register_protocol (u32 protocol, u32 node_index)
2030 {
2031   vlib_main_t * vm = vlib_get_main();
2032   ip4_main_t * im = &ip4_main;
2033   ip_lookup_main_t * lm = &im->lookup_main;
2034
2035   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2036   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2037 }
2038
2039 static clib_error_t *
2040 show_ip_local_command_fn (vlib_main_t * vm,
2041                           unformat_input_t * input,
2042                          vlib_cli_command_t * cmd)
2043 {
2044   ip4_main_t * im = &ip4_main;
2045   ip_lookup_main_t * lm = &im->lookup_main;
2046   int i;
2047
2048   vlib_cli_output (vm, "Protocols handled by ip4_local");
2049   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2050     {
2051       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2052         vlib_cli_output (vm, "%d", i);
2053     }
2054   return 0;
2055 }
2056
2057
2058
2059 VLIB_CLI_COMMAND (show_ip_local, static) = {
2060   .path = "show ip local",
2061   .function = show_ip_local_command_fn,
2062   .short_help = "Show ip local protocol table",
2063 };
2064
2065 static uword
2066 ip4_arp (vlib_main_t * vm,
2067          vlib_node_runtime_t * node,
2068          vlib_frame_t * frame)
2069 {
2070   vnet_main_t * vnm = vnet_get_main();
2071   ip4_main_t * im = &ip4_main;
2072   ip_lookup_main_t * lm = &im->lookup_main;
2073   u32 * from, * to_next_drop;
2074   uword n_left_from, n_left_to_next_drop, next_index;
2075   static f64 time_last_seed_change = -1e100;
2076   static u32 hash_seeds[3];
2077   static uword hash_bitmap[256 / BITS (uword)]; 
2078   f64 time_now;
2079
2080   if (node->flags & VLIB_NODE_FLAG_TRACE)
2081     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2082
2083   time_now = vlib_time_now (vm);
2084   if (time_now - time_last_seed_change > 1e-3)
2085     {
2086       uword i;
2087       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2088                                              sizeof (hash_seeds));
2089       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2090         hash_seeds[i] = r[i];
2091
2092       /* Mark all hash keys as been no-seen before. */
2093       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2094         hash_bitmap[i] = 0;
2095
2096       time_last_seed_change = time_now;
2097     }
2098
2099   from = vlib_frame_vector_args (frame);
2100   n_left_from = frame->n_vectors;
2101   next_index = node->cached_next_index;
2102   if (next_index == IP4_ARP_NEXT_DROP)
2103     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2104
2105   while (n_left_from > 0)
2106     {
2107       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2108                            to_next_drop, n_left_to_next_drop);
2109
2110       while (n_left_from > 0 && n_left_to_next_drop > 0)
2111         {
2112           vlib_buffer_t * p0;
2113           ip4_header_t * ip0;
2114           ethernet_header_t * eh0;
2115           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2116           uword bm0;
2117           ip_adjacency_t * adj0;
2118
2119           pi0 = from[0];
2120
2121           p0 = vlib_get_buffer (vm, pi0);
2122
2123           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2124           adj0 = ip_get_adjacency (lm, adj_index0);
2125           ip0 = vlib_buffer_get_current (p0);
2126
2127           /* If packet destination is not local, send ARP to next hop */
2128           if (adj0->arp.next_hop.ip4.as_u32)
2129             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2130
2131           /* 
2132            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2133            * rewrite to this packet, we need to skip it here.
2134            * Note, to distinguish from src IP addr *.8.6.*, we
2135            * check for a bcast eth dest instead of IPv4 version.
2136            */
2137           eh0 = (ethernet_header_t*)ip0;
2138           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2139             {
2140               u32 vlan_num = 0;
2141               u16 * etype = &eh0->type;
2142               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2143                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2144                 {
2145                   vlan_num += 1;
2146                   etype += 2; //vlan tag also 16 bits, same as etype
2147                 }
2148               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2149                 {
2150                   vlib_buffer_advance (
2151                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2152                   ip0 = vlib_buffer_get_current (p0);
2153                 }
2154             }
2155
2156           a0 = hash_seeds[0];
2157           b0 = hash_seeds[1];
2158           c0 = hash_seeds[2];
2159
2160           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2161           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2162
2163           a0 ^= ip0->dst_address.data_u32;
2164           b0 ^= sw_if_index0;
2165
2166           hash_v3_finalize32 (a0, b0, c0);
2167
2168           c0 &= BITS (hash_bitmap) - 1;
2169           c0 = c0 / BITS (uword);
2170           m0 = (uword) 1 << (c0 % BITS (uword));
2171
2172           bm0 = hash_bitmap[c0];
2173           drop0 = (bm0 & m0) != 0;
2174
2175           /* Mark it as seen. */
2176           hash_bitmap[c0] = bm0 | m0;
2177
2178           from += 1;
2179           n_left_from -= 1;
2180           to_next_drop[0] = pi0;
2181           to_next_drop += 1;
2182           n_left_to_next_drop -= 1;
2183
2184           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2185
2186           if (drop0)
2187             continue;
2188
2189           /* 
2190            * Can happen if the control-plane is programming tables
2191            * with traffic flowing; at least that's today's lame excuse.
2192            */
2193           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2194             {
2195               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2196             }
2197           else
2198           /* Send ARP request. */
2199           {
2200             u32 bi0 = 0;
2201             vlib_buffer_t * b0;
2202             ethernet_arp_header_t * h0;
2203             vnet_hw_interface_t * hw_if0;
2204
2205             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2206
2207             /* Add rewrite/encap string for ARP packet. */
2208             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2209
2210             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2211
2212             /* Src ethernet address in ARP header. */
2213             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2214                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2215
2216             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2217
2218             /* Copy in destination address we are requesting. */
2219             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2220
2221             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2222             b0 = vlib_get_buffer (vm, bi0);
2223             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2224
2225             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2226
2227             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2228           }
2229         }
2230
2231       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2232     }
2233
2234   return frame->n_vectors;
2235 }
2236
2237 static char * ip4_arp_error_strings[] = {
2238   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2239   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2240   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2241   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2242   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2243 };
2244
2245 VLIB_REGISTER_NODE (ip4_arp_node) = {
2246   .function = ip4_arp,
2247   .name = "ip4-arp",
2248   .vector_size = sizeof (u32),
2249
2250   .format_trace = format_ip4_forward_next_trace,
2251
2252   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2253   .error_strings = ip4_arp_error_strings,
2254
2255   .n_next_nodes = IP4_ARP_N_NEXT,
2256   .next_nodes = {
2257     [IP4_ARP_NEXT_DROP] = "error-drop",
2258   },
2259 };
2260
2261 #define foreach_notrace_ip4_arp_error           \
2262 _(DROP)                                         \
2263 _(REQUEST_SENT)                                 \
2264 _(REPLICATE_DROP)                               \
2265 _(REPLICATE_FAIL)
2266
2267 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2268 {
2269   vlib_node_runtime_t *rt = 
2270     vlib_node_get_runtime (vm, ip4_arp_node.index);
2271
2272   /* don't trace ARP request packets */
2273 #define _(a)                                    \
2274     vnet_pcap_drop_trace_filter_add_del         \
2275         (rt->errors[IP4_ARP_ERROR_##a],         \
2276          1 /* is_add */);
2277     foreach_notrace_ip4_arp_error;
2278 #undef _
2279   return 0;
2280 }
2281
2282 VLIB_INIT_FUNCTION(arp_notrace_init);
2283
2284
2285 /* Send an ARP request to see if given destination is reachable on given interface. */
2286 clib_error_t *
2287 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2288 {
2289   vnet_main_t * vnm = vnet_get_main();
2290   ip4_main_t * im = &ip4_main;
2291   ethernet_arp_header_t * h;
2292   ip4_address_t * src;
2293   ip_interface_address_t * ia;
2294   ip_adjacency_t * adj;
2295   vnet_hw_interface_t * hi;
2296   vnet_sw_interface_t * si;
2297   vlib_buffer_t * b;
2298   u32 bi = 0;
2299
2300   si = vnet_get_sw_interface (vnm, sw_if_index);
2301
2302   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2303     {
2304       return clib_error_return (0, "%U: interface %U down",
2305                                 format_ip4_address, dst, 
2306                                 format_vnet_sw_if_index_name, vnm, 
2307                                 sw_if_index);
2308     }
2309
2310   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2311   if (! src)
2312     {
2313       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2314       return clib_error_return 
2315         (0, "no matching interface address for destination %U (interface %U)",
2316          format_ip4_address, dst,
2317          format_vnet_sw_if_index_name, vnm, sw_if_index);
2318     }
2319
2320   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2321
2322   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2323
2324   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2325
2326   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2327
2328   h->ip4_over_ethernet[0].ip4 = src[0];
2329   h->ip4_over_ethernet[1].ip4 = dst[0];
2330
2331   b = vlib_get_buffer (vm, bi);
2332   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2333
2334   /* Add encapsulation string for software interface (e.g. ethernet header). */
2335   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2336   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2337
2338   {
2339     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2340     u32 * to_next = vlib_frame_vector_args (f);
2341     to_next[0] = bi;
2342     f->n_vectors = 1;
2343     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2344   }
2345
2346   return /* no error */ 0;
2347 }
2348
2349 typedef enum {
2350   IP4_REWRITE_NEXT_DROP,
2351   IP4_REWRITE_NEXT_ARP,
2352 } ip4_rewrite_next_t;
2353
2354 always_inline uword
2355 ip4_rewrite_inline (vlib_main_t * vm,
2356                     vlib_node_runtime_t * node,
2357                     vlib_frame_t * frame,
2358                     int rewrite_for_locally_received_packets)
2359 {
2360   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2361   u32 * from = vlib_frame_vector_args (frame);
2362   u32 n_left_from, n_left_to_next, * to_next, next_index;
2363   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2364   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2365
2366   n_left_from = frame->n_vectors;
2367   next_index = node->cached_next_index;
2368   u32 cpu_index = os_get_cpu_number();
2369   
2370   while (n_left_from > 0)
2371     {
2372       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2373
2374       while (n_left_from >= 4 && n_left_to_next >= 2)
2375         {
2376           ip_adjacency_t * adj0, * adj1;
2377           vlib_buffer_t * p0, * p1;
2378           ip4_header_t * ip0, * ip1;
2379           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2380           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2381           u32 next0_override, next1_override;
2382       
2383           if (rewrite_for_locally_received_packets)
2384               next0_override = next1_override = 0;
2385
2386           /* Prefetch next iteration. */
2387           {
2388             vlib_buffer_t * p2, * p3;
2389
2390             p2 = vlib_get_buffer (vm, from[2]);
2391             p3 = vlib_get_buffer (vm, from[3]);
2392
2393             vlib_prefetch_buffer_header (p2, STORE);
2394             vlib_prefetch_buffer_header (p3, STORE);
2395
2396             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2397             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2398           }
2399
2400           pi0 = to_next[0] = from[0];
2401           pi1 = to_next[1] = from[1];
2402
2403           from += 2;
2404           n_left_from -= 2;
2405           to_next += 2;
2406           n_left_to_next -= 2;
2407       
2408           p0 = vlib_get_buffer (vm, pi0);
2409           p1 = vlib_get_buffer (vm, pi1);
2410
2411           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2412           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2413
2414           /* We should never rewrite a pkt using the MISS adjacency */
2415           ASSERT(adj_index0 && adj_index1);
2416
2417           ip0 = vlib_buffer_get_current (p0);
2418           ip1 = vlib_buffer_get_current (p1);
2419
2420           error0 = error1 = IP4_ERROR_NONE;
2421
2422           /* Decrement TTL & update checksum.
2423              Works either endian, so no need for byte swap. */
2424           if (! rewrite_for_locally_received_packets)
2425             {
2426               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2427
2428               /* Input node should have reject packets with ttl 0. */
2429               ASSERT (ip0->ttl > 0);
2430               ASSERT (ip1->ttl > 0);
2431
2432               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2433               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2434
2435               checksum0 += checksum0 >= 0xffff;
2436               checksum1 += checksum1 >= 0xffff;
2437
2438               ip0->checksum = checksum0;
2439               ip1->checksum = checksum1;
2440
2441               ttl0 -= 1;
2442               ttl1 -= 1;
2443
2444               ip0->ttl = ttl0;
2445               ip1->ttl = ttl1;
2446
2447               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2448               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2449
2450               /* Verify checksum. */
2451               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2452               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2453             }
2454
2455           /* Rewrite packet header and updates lengths. */
2456           adj0 = ip_get_adjacency (lm, adj_index0);
2457           adj1 = ip_get_adjacency (lm, adj_index1);
2458       
2459           if (rewrite_for_locally_received_packets)
2460             {
2461               /*
2462                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2463                * we end up here with a local adjacency in hand
2464                * The local adj rewrite data is 0xfefe on purpose.
2465                * Bad engineer, no donut for you.
2466                */
2467               if (PREDICT_FALSE(adj0->lookup_next_index 
2468                                 == IP_LOOKUP_NEXT_LOCAL))
2469                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2470               if (PREDICT_FALSE(adj0->lookup_next_index
2471                                 == IP_LOOKUP_NEXT_ARP))
2472                 next0_override = IP4_REWRITE_NEXT_ARP;
2473               if (PREDICT_FALSE(adj1->lookup_next_index 
2474                                 == IP_LOOKUP_NEXT_LOCAL))
2475                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2476               if (PREDICT_FALSE(adj1->lookup_next_index
2477                                 == IP_LOOKUP_NEXT_ARP))
2478                 next1_override = IP4_REWRITE_NEXT_ARP;
2479             }
2480
2481           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2482           rw_len0 = adj0[0].rewrite_header.data_bytes;
2483           rw_len1 = adj1[0].rewrite_header.data_bytes;
2484           next0 = (error0 == IP4_ERROR_NONE) 
2485             ? adj0[0].rewrite_header.next_index : 0;
2486
2487           if (rewrite_for_locally_received_packets)
2488               next0 = next0 && next0_override ? next0_override : next0;
2489
2490           next1 = (error1 == IP4_ERROR_NONE)
2491             ? adj1[0].rewrite_header.next_index : 0;
2492
2493           if (rewrite_for_locally_received_packets)
2494               next1 = next1 && next1_override ? next1_override : next1;
2495
2496           /* 
2497            * We've already accounted for an ethernet_header_t elsewhere
2498            */
2499           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2500               vlib_increment_combined_counter 
2501                   (&lm->adjacency_counters,
2502                    cpu_index, adj_index0, 
2503                    /* packet increment */ 0,
2504                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2505
2506           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2507               vlib_increment_combined_counter 
2508                   (&lm->adjacency_counters,
2509                    cpu_index, adj_index1, 
2510                    /* packet increment */ 0,
2511                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2512
2513           /* Check MTU of outgoing interface. */
2514           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2515                     ? IP4_ERROR_MTU_EXCEEDED
2516                     : error0);
2517           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2518                     ? IP4_ERROR_MTU_EXCEEDED
2519                     : error1);
2520
2521           p0->current_data -= rw_len0;
2522           p1->current_data -= rw_len1;
2523
2524           p0->current_length += rw_len0;
2525           p1->current_length += rw_len1;
2526
2527           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2528           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2529       
2530           p0->error = error_node->errors[error0];
2531           p1->error = error_node->errors[error1];
2532
2533           /* Guess we are only writing on simple Ethernet header. */
2534           vnet_rewrite_two_headers (adj0[0], adj1[0],
2535                                     ip0, ip1,
2536                                     sizeof (ethernet_header_t));
2537       
2538           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2539                                            to_next, n_left_to_next,
2540                                            pi0, pi1, next0, next1);
2541         }
2542
2543       while (n_left_from > 0 && n_left_to_next > 0)
2544         {
2545           ip_adjacency_t * adj0;
2546           vlib_buffer_t * p0;
2547           ip4_header_t * ip0;
2548           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2549           u32 next0_override;
2550       
2551           if (rewrite_for_locally_received_packets)
2552               next0_override = 0;
2553
2554           pi0 = to_next[0] = from[0];
2555
2556           p0 = vlib_get_buffer (vm, pi0);
2557
2558           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2559
2560           /* We should never rewrite a pkt using the MISS adjacency */
2561           ASSERT(adj_index0);
2562
2563           adj0 = ip_get_adjacency (lm, adj_index0);
2564       
2565           ip0 = vlib_buffer_get_current (p0);
2566
2567           error0 = IP4_ERROR_NONE;
2568           next0 = 0;            /* drop on error */
2569
2570           /* Decrement TTL & update checksum. */
2571           if (! rewrite_for_locally_received_packets)
2572             {
2573               i32 ttl0 = ip0->ttl;
2574
2575               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2576
2577               checksum0 += checksum0 >= 0xffff;
2578
2579               ip0->checksum = checksum0;
2580
2581               ASSERT (ip0->ttl > 0);
2582
2583               ttl0 -= 1;
2584
2585               ip0->ttl = ttl0;
2586
2587               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2588
2589               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2590             }
2591
2592           if (rewrite_for_locally_received_packets)
2593             {
2594               /*
2595                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2596                * we end up here with a local adjacency in hand
2597                * The local adj rewrite data is 0xfefe on purpose.
2598                * Bad engineer, no donut for you.
2599                */
2600               if (PREDICT_FALSE(adj0->lookup_next_index 
2601                                 == IP_LOOKUP_NEXT_LOCAL))
2602                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2603               /* 
2604                * We have to override the next_index in ARP adjacencies,
2605                * because they're set up for ip4-arp, not this node...
2606                */
2607               if (PREDICT_FALSE(adj0->lookup_next_index
2608                                 == IP_LOOKUP_NEXT_ARP))
2609                 next0_override = IP4_REWRITE_NEXT_ARP;
2610             }
2611
2612           /* Guess we are only writing on simple Ethernet header. */
2613           vnet_rewrite_one_header (adj0[0], ip0, 
2614                                    sizeof (ethernet_header_t));
2615           
2616           /* Update packet buffer attributes/set output interface. */
2617           rw_len0 = adj0[0].rewrite_header.data_bytes;
2618           
2619           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2620               vlib_increment_combined_counter 
2621                   (&lm->adjacency_counters,
2622                    cpu_index, adj_index0, 
2623                    /* packet increment */ 0,
2624                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2625           
2626           /* Check MTU of outgoing interface. */
2627           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2628                     > adj0[0].rewrite_header.max_l3_packet_bytes
2629                     ? IP4_ERROR_MTU_EXCEEDED
2630                     : error0);
2631           
2632           p0->error = error_node->errors[error0];
2633           p0->current_data -= rw_len0;
2634           p0->current_length += rw_len0;
2635           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2636             adj0[0].rewrite_header.sw_if_index;
2637           
2638           next0 = (error0 == IP4_ERROR_NONE)
2639             ? adj0[0].rewrite_header.next_index : 0;
2640
2641           if (rewrite_for_locally_received_packets)
2642               next0 = next0 && next0_override ? next0_override : next0;
2643
2644           from += 1;
2645           n_left_from -= 1;
2646           to_next += 1;
2647           n_left_to_next -= 1;
2648       
2649           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2650                                            to_next, n_left_to_next,
2651                                            pi0, next0);
2652         }
2653   
2654       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2655     }
2656
2657   /* Need to do trace after rewrites to pick up new packet data. */
2658   if (node->flags & VLIB_NODE_FLAG_TRACE)
2659     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2660
2661   return frame->n_vectors;
2662 }
2663
2664 static uword
2665 ip4_rewrite_transit (vlib_main_t * vm,
2666                      vlib_node_runtime_t * node,
2667                      vlib_frame_t * frame)
2668 {
2669   return ip4_rewrite_inline (vm, node, frame,
2670                              /* rewrite_for_locally_received_packets */ 0);
2671 }
2672
2673 static uword
2674 ip4_rewrite_local (vlib_main_t * vm,
2675                    vlib_node_runtime_t * node,
2676                    vlib_frame_t * frame)
2677 {
2678   return ip4_rewrite_inline (vm, node, frame,
2679                              /* rewrite_for_locally_received_packets */ 1);
2680 }
2681
2682 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2683   .function = ip4_rewrite_transit,
2684   .name = "ip4-rewrite-transit",
2685   .vector_size = sizeof (u32),
2686
2687   .format_trace = format_ip4_forward_next_trace,
2688
2689   .n_next_nodes = 2,
2690   .next_nodes = {
2691     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2692     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2693   },
2694 };
2695
2696 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2697   .function = ip4_rewrite_local,
2698   .name = "ip4-rewrite-local",
2699   .vector_size = sizeof (u32),
2700
2701   .sibling_of = "ip4-rewrite-transit",
2702
2703   .format_trace = format_ip4_forward_next_trace,
2704
2705   .n_next_nodes = 2,
2706   .next_nodes = {
2707     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2708     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2709   },
2710 };
2711
2712 static clib_error_t *
2713 add_del_interface_table (vlib_main_t * vm,
2714                          unformat_input_t * input,
2715                          vlib_cli_command_t * cmd)
2716 {
2717   vnet_main_t * vnm = vnet_get_main();
2718   clib_error_t * error = 0;
2719   u32 sw_if_index, table_id;
2720
2721   sw_if_index = ~0;
2722
2723   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2724     {
2725       error = clib_error_return (0, "unknown interface `%U'",
2726                                  format_unformat_error, input);
2727       goto done;
2728     }
2729
2730   if (unformat (input, "%d", &table_id))
2731     ;
2732   else
2733     {
2734       error = clib_error_return (0, "expected table id `%U'",
2735                                  format_unformat_error, input);
2736       goto done;
2737     }
2738
2739   {
2740     ip4_main_t * im = &ip4_main;
2741     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2742
2743     if (fib) 
2744       {
2745         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2746         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2747     }
2748   }
2749
2750  done:
2751   return error;
2752 }
2753
2754 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2755   .path = "set interface ip table",
2756   .function = add_del_interface_table,
2757   .short_help = "Add/delete FIB table id for interface",
2758 };
2759
2760
2761 static uword
2762 ip4_lookup_multicast (vlib_main_t * vm,
2763                       vlib_node_runtime_t * node,
2764                       vlib_frame_t * frame)
2765 {
2766   ip4_main_t * im = &ip4_main;
2767   ip_lookup_main_t * lm = &im->lookup_main;
2768   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2769   u32 n_left_from, n_left_to_next, * from, * to_next;
2770   ip_lookup_next_t next;
2771   u32 cpu_index = os_get_cpu_number();
2772
2773   from = vlib_frame_vector_args (frame);
2774   n_left_from = frame->n_vectors;
2775   next = node->cached_next_index;
2776
2777   while (n_left_from > 0)
2778     {
2779       vlib_get_next_frame (vm, node, next,
2780                            to_next, n_left_to_next);
2781
2782       while (n_left_from >= 4 && n_left_to_next >= 2)
2783         {
2784           vlib_buffer_t * p0, * p1;
2785           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2786           ip_lookup_next_t next0, next1;
2787           ip4_header_t * ip0, * ip1;
2788           ip_adjacency_t * adj0, * adj1;
2789           u32 fib_index0, fib_index1;
2790           u32 flow_hash_config0, flow_hash_config1;
2791
2792           /* Prefetch next iteration. */
2793           {
2794             vlib_buffer_t * p2, * p3;
2795
2796             p2 = vlib_get_buffer (vm, from[2]);
2797             p3 = vlib_get_buffer (vm, from[3]);
2798
2799             vlib_prefetch_buffer_header (p2, LOAD);
2800             vlib_prefetch_buffer_header (p3, LOAD);
2801
2802             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2803             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2804           }
2805
2806           pi0 = to_next[0] = from[0];
2807           pi1 = to_next[1] = from[1];
2808
2809           p0 = vlib_get_buffer (vm, pi0);
2810           p1 = vlib_get_buffer (vm, pi1);
2811
2812           ip0 = vlib_buffer_get_current (p0);
2813           ip1 = vlib_buffer_get_current (p1);
2814
2815           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2816           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2817           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2818             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2819           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2820             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2821
2822           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2823                                               &ip0->dst_address, p0);
2824           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2825                                               &ip1->dst_address, p1);
2826
2827           adj0 = ip_get_adjacency (lm, adj_index0);
2828           adj1 = ip_get_adjacency (lm, adj_index1);
2829
2830           next0 = adj0->lookup_next_index;
2831           next1 = adj1->lookup_next_index;
2832
2833           flow_hash_config0 = 
2834               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2835
2836           flow_hash_config1 = 
2837               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2838
2839           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2840               (ip0, flow_hash_config0);
2841                                                                   
2842           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2843               (ip1, flow_hash_config1);
2844
2845           ASSERT (adj0->n_adj > 0);
2846           ASSERT (adj1->n_adj > 0);
2847           ASSERT (is_pow2 (adj0->n_adj));
2848           ASSERT (is_pow2 (adj1->n_adj));
2849           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2850           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2851
2852           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2853           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2854
2855           if (1) /* $$$$$$ HACK FIXME */
2856           vlib_increment_combined_counter 
2857               (cm, cpu_index, adj_index0, 1,
2858                vlib_buffer_length_in_chain (vm, p0));
2859           if (1) /* $$$$$$ HACK FIXME */
2860           vlib_increment_combined_counter 
2861               (cm, cpu_index, adj_index1, 1,
2862                vlib_buffer_length_in_chain (vm, p1));
2863
2864           from += 2;
2865           to_next += 2;
2866           n_left_to_next -= 2;
2867           n_left_from -= 2;
2868
2869           wrong_next = (next0 != next) + 2*(next1 != next);
2870           if (PREDICT_FALSE (wrong_next != 0))
2871             {
2872               switch (wrong_next)
2873                 {
2874                 case 1:
2875                   /* A B A */
2876                   to_next[-2] = pi1;
2877                   to_next -= 1;
2878                   n_left_to_next += 1;
2879                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2880                   break;
2881
2882                 case 2:
2883                   /* A A B */
2884                   to_next -= 1;
2885                   n_left_to_next += 1;
2886                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2887                   break;
2888
2889                 case 3:
2890                   /* A B C */
2891                   to_next -= 2;
2892                   n_left_to_next += 2;
2893                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2894                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2895                   if (next0 == next1)
2896                     {
2897                       /* A B B */
2898                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2899                       next = next1;
2900                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2901                     }
2902                 }
2903             }
2904         }
2905     
2906       while (n_left_from > 0 && n_left_to_next > 0)
2907         {
2908           vlib_buffer_t * p0;
2909           ip4_header_t * ip0;
2910           u32 pi0, adj_index0;
2911           ip_lookup_next_t next0;
2912           ip_adjacency_t * adj0;
2913           u32 fib_index0;
2914           u32 flow_hash_config0;
2915
2916           pi0 = from[0];
2917           to_next[0] = pi0;
2918
2919           p0 = vlib_get_buffer (vm, pi0);
2920
2921           ip0 = vlib_buffer_get_current (p0);
2922
2923           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2924                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2925           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2926               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2927           
2928           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2929                                               &ip0->dst_address, p0);
2930
2931           adj0 = ip_get_adjacency (lm, adj_index0);
2932
2933           next0 = adj0->lookup_next_index;
2934
2935           flow_hash_config0 = 
2936               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2937
2938           vnet_buffer (p0)->ip.flow_hash = 
2939             ip4_compute_flow_hash (ip0, flow_hash_config0);
2940
2941           ASSERT (adj0->n_adj > 0);
2942           ASSERT (is_pow2 (adj0->n_adj));
2943           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2944
2945           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2946
2947           if (1) /* $$$$$$ HACK FIXME */
2948               vlib_increment_combined_counter 
2949                   (cm, cpu_index, adj_index0, 1,
2950                    vlib_buffer_length_in_chain (vm, p0));
2951
2952           from += 1;
2953           to_next += 1;
2954           n_left_to_next -= 1;
2955           n_left_from -= 1;
2956
2957           if (PREDICT_FALSE (next0 != next))
2958             {
2959               n_left_to_next += 1;
2960               vlib_put_next_frame (vm, node, next, n_left_to_next);
2961               next = next0;
2962               vlib_get_next_frame (vm, node, next,
2963                                    to_next, n_left_to_next);
2964               to_next[0] = pi0;
2965               to_next += 1;
2966               n_left_to_next -= 1;
2967             }
2968         }
2969
2970       vlib_put_next_frame (vm, node, next, n_left_to_next);
2971     }
2972
2973   return frame->n_vectors;
2974 }
2975
2976 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2977   .function = ip4_lookup_multicast,
2978   .name = "ip4-lookup-multicast",
2979   .vector_size = sizeof (u32),
2980
2981   .n_next_nodes = IP_LOOKUP_N_NEXT,
2982   .next_nodes = {
2983     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
2984     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
2985     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
2986     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
2987     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
2988     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
2989     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
2990     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
2991     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
2992     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
2993     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
2994     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
2995     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
2996   },
2997 };
2998
2999 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3000   .function = ip4_drop,
3001   .name = "ip4-multicast",
3002   .vector_size = sizeof (u32),
3003
3004   .format_trace = format_ip4_forward_next_trace,
3005
3006   .n_next_nodes = 1,
3007   .next_nodes = {
3008     [0] = "error-drop",
3009   },
3010 };
3011
3012 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3013 {
3014   ip4_main_t * im = &ip4_main;
3015   ip4_fib_mtrie_t * mtrie0;
3016   ip4_fib_mtrie_leaf_t leaf0;
3017   u32 adj_index0;
3018     
3019   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3020
3021   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3022   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3023   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3024   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3025   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3026   
3027   /* Handle default route. */
3028   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3029   
3030   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3031   
3032   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3033                                                   a, 
3034                                                   /* no_default_route */ 0);
3035 }
3036  
3037 static clib_error_t *
3038 test_lookup_command_fn (vlib_main_t * vm,
3039                         unformat_input_t * input,
3040                         vlib_cli_command_t * cmd)
3041 {
3042   u32 table_id = 0;
3043   f64 count = 1;
3044   u32 n;
3045   int i;
3046   ip4_address_t ip4_base_address;
3047   u64 errors = 0;
3048
3049   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3050       if (unformat (input, "table %d", &table_id))
3051         ;
3052       else if (unformat (input, "count %f", &count))
3053         ;
3054
3055       else if (unformat (input, "%U",
3056                          unformat_ip4_address, &ip4_base_address))
3057         ;
3058       else
3059         return clib_error_return (0, "unknown input `%U'",
3060                                   format_unformat_error, input);
3061   }
3062
3063   n = count;
3064
3065   for (i = 0; i < n; i++)
3066     {
3067       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3068         errors++;
3069
3070       ip4_base_address.as_u32 = 
3071         clib_host_to_net_u32 (1 + 
3072                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3073     }
3074
3075   if (errors) 
3076     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3077   else
3078     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3079
3080   return 0;
3081 }
3082
3083 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3084     .path = "test lookup",
3085     .short_help = "test lookup",
3086     .function = test_lookup_command_fn,
3087 };
3088
3089 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3090 {
3091   ip4_main_t * im4 = &ip4_main;
3092   ip4_fib_t * fib;
3093   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3094
3095   if (p == 0)
3096     return VNET_API_ERROR_NO_SUCH_FIB;
3097
3098   fib = vec_elt_at_index (im4->fibs, p[0]);
3099
3100   fib->flow_hash_config = flow_hash_config;
3101   return 0;
3102 }
3103  
3104 static clib_error_t *
3105 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3106                              unformat_input_t * input,
3107                              vlib_cli_command_t * cmd)
3108 {
3109   int matched = 0;
3110   u32 table_id = 0;
3111   u32 flow_hash_config = 0;
3112   int rv;
3113
3114   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3115     if (unformat (input, "table %d", &table_id))
3116       matched = 1;
3117 #define _(a,v) \
3118     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3119     foreach_flow_hash_bit
3120 #undef _
3121     else break;
3122   }
3123   
3124   if (matched == 0)
3125     return clib_error_return (0, "unknown input `%U'",
3126                               format_unformat_error, input);
3127   
3128   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3129   switch (rv)
3130     {
3131     case 0:
3132       break;
3133       
3134     case VNET_API_ERROR_NO_SUCH_FIB:
3135       return clib_error_return (0, "no such FIB table %d", table_id);
3136       
3137     default:
3138       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3139       break;
3140     }
3141   
3142   return 0;
3143 }
3144  
3145 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3146   .path = "set ip flow-hash",
3147   .short_help = 
3148   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3149   .function = set_ip_flow_hash_command_fn,
3150 };
3151  
3152 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3153                                  u32 table_index)
3154 {
3155   vnet_main_t * vnm = vnet_get_main();
3156   vnet_interface_main_t * im = &vnm->interface_main;
3157   ip4_main_t * ipm = &ip4_main;
3158   ip_lookup_main_t * lm = &ipm->lookup_main;
3159   vnet_classify_main_t * cm = &vnet_classify_main;
3160
3161   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3162     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3163
3164   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3165     return VNET_API_ERROR_NO_SUCH_ENTRY;
3166
3167   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3168   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3169
3170   return 0;
3171 }
3172
3173 static clib_error_t *
3174 set_ip_classify_command_fn (vlib_main_t * vm,
3175                             unformat_input_t * input,
3176                             vlib_cli_command_t * cmd)
3177 {
3178   u32 table_index = ~0;
3179   int table_index_set = 0;
3180   u32 sw_if_index = ~0;
3181   int rv;
3182   
3183   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3184     if (unformat (input, "table-index %d", &table_index))
3185       table_index_set = 1;
3186     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3187                        vnet_get_main(), &sw_if_index))
3188       ;
3189     else
3190       break;
3191   }
3192       
3193   if (table_index_set == 0)
3194     return clib_error_return (0, "classify table-index must be specified");
3195
3196   if (sw_if_index == ~0)
3197     return clib_error_return (0, "interface / subif must be specified");
3198
3199   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3200
3201   switch (rv)
3202     {
3203     case 0:
3204       break;
3205
3206     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3207       return clib_error_return (0, "No such interface");
3208
3209     case VNET_API_ERROR_NO_SUCH_ENTRY:
3210       return clib_error_return (0, "No such classifier table");
3211     }
3212   return 0;
3213 }
3214
3215 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3216     .path = "set ip classify",
3217     .short_help = 
3218     "set ip classify intfc <int> table-index <index>",
3219     .function = set_ip_classify_command_fn,
3220 };
3221