Fix ip4 address formatting in the show ip fib ouptut
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
246     {
247       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248       if (adj->share_count > 0)
249         adj->share_count --;
250     }
251
252   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253                                is_del ? old_adj_index : adj_index,
254                                is_del);
255
256   /* Delete old adjacency index if present and changed. */
257   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258       && old_adj_index != ~0
259       && old_adj_index != adj_index)
260     ip_del_adjacency (lm, old_adj_index);
261 }
262
263 void
264 ip4_add_del_route_next_hop (ip4_main_t * im,
265                             u32 flags,
266                             ip4_address_t * dst_address,
267                             u32 dst_address_length,
268                             ip4_address_t * next_hop,
269                             u32 next_hop_sw_if_index,
270                             u32 next_hop_weight, u32 adj_index, 
271                             u32 explicit_fib_index)
272 {
273   vnet_main_t * vnm = vnet_get_main();
274   ip_lookup_main_t * lm = &im->lookup_main;
275   u32 fib_index;
276   ip4_fib_t * fib;
277   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278   u32 dst_adj_index, nh_adj_index;
279   uword * dst_hash, * dst_result;
280   uword * nh_hash, * nh_result;
281   ip_adjacency_t * dst_adj;
282   ip_multipath_adjacency_t * old_mp, * new_mp;
283   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284   int is_interface_next_hop;
285   clib_error_t * error = 0;
286
287   if (explicit_fib_index == (u32)~0)
288       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
289   else
290       fib_index = explicit_fib_index;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293   
294   /* Lookup next hop to be added or deleted. */
295   is_interface_next_hop = next_hop->data_u32 == 0;
296   if (adj_index == (u32)~0)
297     {
298       if (is_interface_next_hop)
299         {
300           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
301           if (nh_result)
302             nh_adj_index = *nh_result;
303           else
304             {
305               ip_adjacency_t * adj;
306               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
307                                       &nh_adj_index);
308               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
311             }
312         }
313       else
314         {
315           nh_hash = fib->adj_index_by_dst_address[32];
316           nh_result = hash_get (nh_hash, next_hop->data_u32);
317           
318           /* Next hop must be known. */
319           if (! nh_result)
320             {
321               ip_adjacency_t * adj;
322
323               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
324                                                         next_hop, 0);
325               adj = ip_get_adjacency (lm, nh_adj_index);
326               /* if ARP interface adjacencty is present, we need to
327                  install ARP adjaceny for specific next hop */
328               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
329                   adj->arp.next_hop.ip4.as_u32 == 0)
330                 {
331                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
332                 }
333               else
334                 {
335                   vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
336                   error = clib_error_return (0, "next-hop %U/32 not in FIB",
337                                              format_ip4_address, next_hop);
338                   goto done;
339                 }
340             }
341           else
342             nh_adj_index = *nh_result;
343         }
344     }
345   else
346     {
347       nh_adj_index = adj_index;
348     }
349   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
350   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
351
352   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
353   dst_result = hash_get (dst_hash, dst_address_u32);
354   if (dst_result)
355     {
356       dst_adj_index = dst_result[0];
357       dst_adj = ip_get_adjacency (lm, dst_adj_index);
358     }
359   else
360     {
361       /* For deletes destination must be known. */
362       if (is_del)
363         {
364           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
365           error = clib_error_return (0, "unknown destination %U/%d",
366                                      format_ip4_address, dst_address,
367                                      dst_address_length);
368           goto done;
369         }
370
371       dst_adj_index = ~0;
372       dst_adj = 0;
373     }
374
375   /* Ignore adds of X/32 with next hop of X. */
376   if (! is_del
377       && dst_address_length == 32
378       && dst_address->data_u32 == next_hop->data_u32 
379       && adj_index != (u32)~0)
380     {
381       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
382       error = clib_error_return (0, "prefix matches next hop %U/%d",
383                                  format_ip4_address, dst_address,
384                                  dst_address_length);
385       goto done;
386     }
387
388   /* Destination is not known and default weight is set so add route
389      to existing non-multipath adjacency */
390   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
391     {
392       /* create new adjacency */
393       ip4_add_del_route_args_t a;
394       a.table_index_or_table_id = fib_index;
395       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
396                  | IP4_ROUTE_FLAG_FIB_INDEX
397                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
398                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
399                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
400       a.dst_address = dst_address[0];
401       a.dst_address_length = dst_address_length;
402       a.adj_index = nh_adj_index;
403       a.add_adj = 0;
404       a.n_add_adj = 0;
405
406       ip4_add_del_route (im, &a);
407
408       goto done;
409     }
410
411   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
412
413   if (! ip_multipath_adjacency_add_del_next_hop
414       (lm, is_del,
415        old_mp_adj_index,
416        nh_adj_index,
417        next_hop_weight,
418        &new_mp_adj_index))
419     {
420       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
421       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
422                                  format_ip4_address, next_hop);
423       goto done;
424     }
425   
426   old_mp = new_mp = 0;
427   if (old_mp_adj_index != ~0)
428     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
429   if (new_mp_adj_index != ~0)
430     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
431
432   if (old_mp != new_mp)
433     {
434       ip4_add_del_route_args_t a;
435       a.table_index_or_table_id = fib_index;
436       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
437                  | IP4_ROUTE_FLAG_FIB_INDEX
438                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
439                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
440       a.dst_address = dst_address[0];
441       a.dst_address_length = dst_address_length;
442       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
443       a.add_adj = 0;
444       a.n_add_adj = 0;
445
446       ip4_add_del_route (im, &a);
447     }
448
449  done:
450   if (error)
451     clib_error_report (error);
452 }
453
454 void *
455 ip4_get_route (ip4_main_t * im,
456                u32 table_index_or_table_id,
457                u32 flags,
458                u8 * address,
459                u32 address_length)
460 {
461   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
462   u32 dst_address = * (u32 *) address;
463   uword * hash, * p;
464
465   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
466   dst_address &= im->fib_masks[address_length];
467
468   hash = fib->adj_index_by_dst_address[address_length];
469   p = hash_get (hash, dst_address);
470   return (void *) p;
471 }
472
473 void
474 ip4_foreach_matching_route (ip4_main_t * im,
475                             u32 table_index_or_table_id,
476                             u32 flags,
477                             ip4_address_t * address,
478                             u32 address_length,
479                             ip4_address_t ** results,
480                             u8 ** result_lengths)
481 {
482   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
483   u32 dst_address = address->data_u32;
484   u32 this_length = address_length;
485   
486   if (*results)
487     _vec_len (*results) = 0;
488   if (*result_lengths)
489     _vec_len (*result_lengths) = 0;
490
491   while (this_length <= 32 && vec_len (results) == 0)
492     {
493       uword k, v;
494       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
495         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
496           {
497             ip4_address_t a;
498             a.data_u32 = k;
499             vec_add1 (*results, a);
500             vec_add1 (*result_lengths, this_length);
501           }
502       }));
503
504       this_length++;
505     }
506 }
507
508 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
509                                   u32 table_index_or_table_id,
510                                   u32 flags)
511 {
512   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
513   ip_lookup_main_t * lm = &im->lookup_main;
514   u32 i, l;
515   ip4_address_t a;
516   ip4_add_del_route_callback_t * cb;
517   static ip4_address_t * to_delete;
518
519   if (lm->n_adjacency_remaps == 0)
520     return;
521
522   for (l = 0; l <= 32; l++)
523     {
524       hash_pair_t * p;
525       uword * hash = fib->adj_index_by_dst_address[l];
526
527       if (hash_elts (hash) == 0)
528         continue;
529
530       if (to_delete)
531         _vec_len (to_delete) = 0;
532
533       hash_foreach_pair (p, hash, ({
534         u32 adj_index = p->value[0];
535         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
536
537         if (m)
538           {
539             /* Record destination address from hash key. */
540             a.data_u32 = p->key;
541
542             /* New adjacency points to nothing: so delete prefix. */
543             if (m == ~0)
544               vec_add1 (to_delete, a);
545             else
546               {
547                 /* Remap to new adjacency. */
548                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
549
550                 /* Set new adjacency value. */
551                 fib->new_hash_values[0] = p->value[0] = m - 1;
552
553                 vec_foreach (cb, im->add_del_route_callbacks)
554                   if ((flags & cb->required_flags) == cb->required_flags)
555                     cb->function (im, cb->function_opaque,
556                                   fib, flags | IP4_ROUTE_FLAG_ADD,
557                                   &a, l,
558                                   fib->old_hash_values,
559                                   fib->new_hash_values);
560               }
561           }
562       }));
563
564       fib->new_hash_values[0] = ~0;
565       for (i = 0; i < vec_len (to_delete); i++)
566         {
567           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
568           vec_foreach (cb, im->add_del_route_callbacks)
569             if ((flags & cb->required_flags) == cb->required_flags)
570               cb->function (im, cb->function_opaque,
571                             fib, flags | IP4_ROUTE_FLAG_DEL,
572                             &a, l,
573                             fib->old_hash_values,
574                             fib->new_hash_values);
575         }
576     }
577
578   /* Also remap adjacencies in mtrie. */
579   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
580
581   /* Reset mapping table. */
582   vec_zero (lm->adjacency_remap_table);
583
584   /* All remaps have been performed. */
585   lm->n_adjacency_remaps = 0;
586 }
587
588 void ip4_delete_matching_routes (ip4_main_t * im,
589                                  u32 table_index_or_table_id,
590                                  u32 flags,
591                                  ip4_address_t * address,
592                                  u32 address_length)
593 {
594   static ip4_address_t * matching_addresses;
595   static u8 * matching_address_lengths;
596   u32 l, i;
597   ip4_add_del_route_args_t a;
598
599   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
600   a.table_index_or_table_id = table_index_or_table_id;
601   a.adj_index = ~0;
602   a.add_adj = 0;
603   a.n_add_adj = 0;
604
605   for (l = address_length + 1; l <= 32; l++)
606     {
607       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
608                                   address,
609                                   l,
610                                   &matching_addresses,
611                                   &matching_address_lengths);
612       for (i = 0; i < vec_len (matching_addresses); i++)
613         {
614           a.dst_address = matching_addresses[i];
615           a.dst_address_length = matching_address_lengths[i];
616           ip4_add_del_route (im, &a);
617         }
618     }
619
620   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
621 }
622
623 always_inline uword
624 ip4_lookup_inline (vlib_main_t * vm,
625                    vlib_node_runtime_t * node,
626                    vlib_frame_t * frame,
627                    int lookup_for_responses_to_locally_received_packets)
628 {
629   ip4_main_t * im = &ip4_main;
630   ip_lookup_main_t * lm = &im->lookup_main;
631   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
632   u32 n_left_from, n_left_to_next, * from, * to_next;
633   ip_lookup_next_t next;
634   u32 cpu_index = os_get_cpu_number();
635
636   from = vlib_frame_vector_args (frame);
637   n_left_from = frame->n_vectors;
638   next = node->cached_next_index;
639
640   while (n_left_from > 0)
641     {
642       vlib_get_next_frame (vm, node, next,
643                            to_next, n_left_to_next);
644
645       while (n_left_from >= 4 && n_left_to_next >= 2)
646         {
647           vlib_buffer_t * p0, * p1;
648           ip4_header_t * ip0, * ip1;
649           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
650           ip_lookup_next_t next0, next1;
651           ip_adjacency_t * adj0, * adj1;
652           ip4_fib_mtrie_t * mtrie0, * mtrie1;
653           ip4_fib_mtrie_leaf_t leaf0, leaf1;
654           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
655           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
656           u32 flow_hash_config0, flow_hash_config1;
657           u32 hash_c0, hash_c1;
658           u32 wrong_next;
659
660           /* Prefetch next iteration. */
661           {
662             vlib_buffer_t * p2, * p3;
663
664             p2 = vlib_get_buffer (vm, from[2]);
665             p3 = vlib_get_buffer (vm, from[3]);
666
667             vlib_prefetch_buffer_header (p2, LOAD);
668             vlib_prefetch_buffer_header (p3, LOAD);
669
670             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
671             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
672           }
673
674           pi0 = to_next[0] = from[0];
675           pi1 = to_next[1] = from[1];
676
677           p0 = vlib_get_buffer (vm, pi0);
678           p1 = vlib_get_buffer (vm, pi1);
679
680           ip0 = vlib_buffer_get_current (p0);
681           ip1 = vlib_buffer_get_current (p1);
682
683           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
684           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
685           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
686             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
687           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
688             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
689
690
691           if (! lookup_for_responses_to_locally_received_packets)
692             {
693               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
694               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
695
696               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
697
698               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
699               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
700             }
701
702           tcp0 = (void *) (ip0 + 1);
703           tcp1 = (void *) (ip1 + 1);
704
705           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
706                          || ip0->protocol == IP_PROTOCOL_UDP);
707           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
708                          || ip1->protocol == IP_PROTOCOL_UDP);
709
710           if (! lookup_for_responses_to_locally_received_packets)
711             {
712               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
713               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
714             }
715
716           if (! lookup_for_responses_to_locally_received_packets)
717             {
718               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
719               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
720             }
721
722           if (! lookup_for_responses_to_locally_received_packets)
723             {
724               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
725               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
726             }
727
728           if (lookup_for_responses_to_locally_received_packets)
729             {
730               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
731               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
732             }
733           else
734             {
735               /* Handle default route. */
736               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
737               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
738
739               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
740               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
741             }
742
743           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
744                                                            &ip0->dst_address,
745                                                            /* no_default_route */ 0));
746           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
747                                                            &ip1->dst_address,
748                                                            /* no_default_route */ 0));
749           adj0 = ip_get_adjacency (lm, adj_index0);
750           adj1 = ip_get_adjacency (lm, adj_index1);
751
752           next0 = adj0->lookup_next_index;
753           next1 = adj1->lookup_next_index;
754
755           /* Use flow hash to compute multipath adjacency. */
756           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
757           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
758           if (PREDICT_FALSE (adj0->n_adj > 1))
759             {
760               flow_hash_config0 = 
761                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
762               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
763                 ip4_compute_flow_hash (ip0, flow_hash_config0);
764             }
765           if (PREDICT_FALSE(adj1->n_adj > 1))
766             {
767               flow_hash_config1 = 
768                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
769               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
770                 ip4_compute_flow_hash (ip1, flow_hash_config1);
771             }
772
773           ASSERT (adj0->n_adj > 0);
774           ASSERT (adj1->n_adj > 0);
775           ASSERT (is_pow2 (adj0->n_adj));
776           ASSERT (is_pow2 (adj1->n_adj));
777           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
778           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
779
780           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
781           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
782
783           vlib_increment_combined_counter 
784               (cm, cpu_index, adj_index0, 1,
785                vlib_buffer_length_in_chain (vm, p0) 
786                + sizeof(ethernet_header_t));
787           vlib_increment_combined_counter 
788               (cm, cpu_index, adj_index1, 1,
789                vlib_buffer_length_in_chain (vm, p1)
790                + sizeof(ethernet_header_t));
791
792           from += 2;
793           to_next += 2;
794           n_left_to_next -= 2;
795           n_left_from -= 2;
796
797           wrong_next = (next0 != next) + 2*(next1 != next);
798           if (PREDICT_FALSE (wrong_next != 0))
799             {
800               switch (wrong_next)
801                 {
802                 case 1:
803                   /* A B A */
804                   to_next[-2] = pi1;
805                   to_next -= 1;
806                   n_left_to_next += 1;
807                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
808                   break;
809
810                 case 2:
811                   /* A A B */
812                   to_next -= 1;
813                   n_left_to_next += 1;
814                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
815                   break;
816
817                 case 3:
818                   /* A B C */
819                   to_next -= 2;
820                   n_left_to_next += 2;
821                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
822                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
823                   if (next0 == next1)
824                     {
825                       /* A B B */
826                       vlib_put_next_frame (vm, node, next, n_left_to_next);
827                       next = next1;
828                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
829                     }
830                 }
831             }
832         }
833     
834       while (n_left_from > 0 && n_left_to_next > 0)
835         {
836           vlib_buffer_t * p0;
837           ip4_header_t * ip0;
838           __attribute__((unused)) tcp_header_t * tcp0;
839           ip_lookup_next_t next0;
840           ip_adjacency_t * adj0;
841           ip4_fib_mtrie_t * mtrie0;
842           ip4_fib_mtrie_leaf_t leaf0;
843           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
844           u32 flow_hash_config0, hash_c0;
845
846           pi0 = from[0];
847           to_next[0] = pi0;
848
849           p0 = vlib_get_buffer (vm, pi0);
850
851           ip0 = vlib_buffer_get_current (p0);
852
853           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
854           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
855             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
856
857           if (! lookup_for_responses_to_locally_received_packets)
858             {
859               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
860
861               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
862
863               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
864             }
865
866           tcp0 = (void *) (ip0 + 1);
867
868           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
869                          || ip0->protocol == IP_PROTOCOL_UDP);
870
871           if (! lookup_for_responses_to_locally_received_packets)
872             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
873
874           if (! lookup_for_responses_to_locally_received_packets)
875             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
876
877           if (! lookup_for_responses_to_locally_received_packets)
878             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
879
880           if (lookup_for_responses_to_locally_received_packets)
881             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
882           else
883             {
884               /* Handle default route. */
885               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
886               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
887             }
888
889           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
890                                                            &ip0->dst_address,
891                                                            /* no_default_route */ 0));
892
893           adj0 = ip_get_adjacency (lm, adj_index0);
894
895           next0 = adj0->lookup_next_index;
896
897           /* Use flow hash to compute multipath adjacency. */
898           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
899           if (PREDICT_FALSE(adj0->n_adj > 1))
900             {
901               flow_hash_config0 = 
902                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
903
904               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
905                 ip4_compute_flow_hash (ip0, flow_hash_config0);
906             }
907
908           ASSERT (adj0->n_adj > 0);
909           ASSERT (is_pow2 (adj0->n_adj));
910           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
911
912           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
913
914           vlib_increment_combined_counter 
915               (cm, cpu_index, adj_index0, 1,
916                vlib_buffer_length_in_chain (vm, p0)
917                + sizeof(ethernet_header_t));
918
919           from += 1;
920           to_next += 1;
921           n_left_to_next -= 1;
922           n_left_from -= 1;
923
924           if (PREDICT_FALSE (next0 != next))
925             {
926               n_left_to_next += 1;
927               vlib_put_next_frame (vm, node, next, n_left_to_next);
928               next = next0;
929               vlib_get_next_frame (vm, node, next,
930                                    to_next, n_left_to_next);
931               to_next[0] = pi0;
932               to_next += 1;
933               n_left_to_next -= 1;
934             }
935         }
936
937       vlib_put_next_frame (vm, node, next, n_left_to_next);
938     }
939
940   return frame->n_vectors;
941 }
942
943 static uword
944 ip4_lookup (vlib_main_t * vm,
945             vlib_node_runtime_t * node,
946             vlib_frame_t * frame)
947 {
948   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
949
950 }
951
952 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
953                                         ip_adjacency_t * adj,
954                                         u32 sw_if_index,
955                                         u32 if_address_index)
956 {
957   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
958   ip_lookup_next_t n;
959   vnet_l3_packet_type_t packet_type;
960   u32 node_index;
961
962   if (hw->hw_class_index == ethernet_hw_interface_class.index
963       || hw->hw_class_index == srp_hw_interface_class.index)
964     {
965       /* 
966        * We have a bit of a problem in this case. ip4-arp uses
967        * the rewrite_header.next_index to hand pkts to the
968        * indicated inteface output node. We can end up in
969        * ip4_rewrite_local, too, which also pays attention to 
970        * rewrite_header.next index. Net result: a hack in
971        * ip4_rewrite_local...
972        */
973       n = IP_LOOKUP_NEXT_ARP;
974       node_index = ip4_arp_node.index;
975       adj->if_address_index = if_address_index;
976       adj->arp.next_hop.ip4.as_u32 = 0;
977       ip46_address_reset(&adj->arp.next_hop);
978       packet_type = VNET_L3_PACKET_TYPE_ARP;
979     }
980   else
981     {
982       n = IP_LOOKUP_NEXT_REWRITE;
983       node_index = ip4_rewrite_node.index;
984       packet_type = VNET_L3_PACKET_TYPE_IP4;
985     }
986
987   adj->lookup_next_index = n;
988   vnet_rewrite_for_sw_interface
989     (vnm,
990      packet_type,
991      sw_if_index,
992      node_index,
993      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
994      &adj->rewrite_header,
995      sizeof (adj->rewrite_data));
996 }
997
998 static void
999 ip4_add_interface_routes (u32 sw_if_index,
1000                           ip4_main_t * im, u32 fib_index,
1001                           ip_interface_address_t * a)
1002 {
1003   vnet_main_t * vnm = vnet_get_main();
1004   ip_lookup_main_t * lm = &im->lookup_main;
1005   ip_adjacency_t * adj;
1006   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1007   ip4_add_del_route_args_t x;
1008   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1009   u32 classify_table_index;
1010
1011   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1012   x.table_index_or_table_id = fib_index;
1013   x.flags = (IP4_ROUTE_FLAG_ADD
1014              | IP4_ROUTE_FLAG_FIB_INDEX
1015              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1016   x.dst_address = address[0];
1017   x.dst_address_length = a->address_length;
1018   x.n_add_adj = 0;
1019   x.add_adj = 0;
1020
1021   a->neighbor_probe_adj_index = ~0;
1022   if (a->address_length < 32)
1023     {
1024       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1025                               &x.adj_index);
1026       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1027       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1028       ip4_add_del_route (im, &x);
1029       a->neighbor_probe_adj_index = x.adj_index;
1030     }
1031   
1032   /* Add e.g. 1.1.1.1/32 as local to this host. */
1033   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1034                           &x.adj_index);
1035   
1036   classify_table_index = ~0;
1037   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1038     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1039   if (classify_table_index != (u32) ~0)
1040     {
1041       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1042       adj->classify.table_index = classify_table_index;
1043     }
1044   else
1045     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1046   
1047   adj->if_address_index = a - lm->if_address_pool;
1048   adj->rewrite_header.sw_if_index = sw_if_index;
1049   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1050   /* 
1051    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1052    * fail an RPF-ish check, but still go thru the rewrite code...
1053    */
1054   adj->rewrite_header.data_bytes = 0;
1055
1056   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1057   x.dst_address_length = 32;
1058   ip4_add_del_route (im, &x);
1059 }
1060
1061 static void
1062 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1063 {
1064   ip4_add_del_route_args_t x;
1065
1066   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1067   x.table_index_or_table_id = fib_index;
1068   x.flags = (IP4_ROUTE_FLAG_DEL
1069              | IP4_ROUTE_FLAG_FIB_INDEX
1070              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1071   x.dst_address = address[0];
1072   x.dst_address_length = address_length;
1073   x.adj_index = ~0;
1074   x.n_add_adj = 0;
1075   x.add_adj = 0;
1076
1077   if (address_length < 32)
1078     ip4_add_del_route (im, &x);
1079
1080   x.dst_address_length = 32;
1081   ip4_add_del_route (im, &x);
1082
1083   ip4_delete_matching_routes (im,
1084                               fib_index,
1085                               IP4_ROUTE_FLAG_FIB_INDEX,
1086                               address,
1087                               address_length);
1088 }
1089
1090 typedef struct {
1091     u32 sw_if_index;
1092     ip4_address_t address;
1093     u32 length;
1094 } ip4_interface_address_t;
1095
1096 static clib_error_t *
1097 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1098                                         u32 sw_if_index,
1099                                         ip4_address_t * new_address,
1100                                         u32 new_length,
1101                                         u32 redistribute,
1102                                         u32 insert_routes,
1103                                         u32 is_del);
1104
1105 static clib_error_t *
1106 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1107                                         u32 sw_if_index,
1108                                         ip4_address_t * address,
1109                                         u32 address_length,
1110                                         u32 redistribute,
1111                                         u32 insert_routes,
1112                                         u32 is_del)
1113 {
1114   vnet_main_t * vnm = vnet_get_main();
1115   ip4_main_t * im = &ip4_main;
1116   ip_lookup_main_t * lm = &im->lookup_main;
1117   clib_error_t * error = 0;
1118   u32 if_address_index, elts_before;
1119   ip4_address_fib_t ip4_af, * addr_fib = 0;
1120
1121   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1122   ip4_addr_fib_init (&ip4_af, address,
1123                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1124   vec_add1 (addr_fib, ip4_af);
1125
1126   /* When adding an address check that it does not conflict with an existing address. */
1127   if (! is_del)
1128     {
1129       ip_interface_address_t * ia;
1130       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1131                                     0 /* honor unnumbered */,
1132       ({
1133         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1134
1135         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1136             || ip4_destination_matches_route (im, x, address, address_length))
1137           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1138                                     format_ip4_address_and_length, address, address_length,
1139                                     format_ip4_address_and_length, x, ia->address_length,
1140                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1141       }));
1142     }
1143
1144   elts_before = pool_elts (lm->if_address_pool);
1145
1146   error = ip_interface_address_add_del
1147     (lm,
1148      sw_if_index,
1149      addr_fib,
1150      address_length,
1151      is_del,
1152      &if_address_index);
1153   if (error)
1154     goto done;
1155   
1156   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1157     {
1158       if (is_del)
1159         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1160                                   address_length);
1161       
1162       else
1163           ip4_add_interface_routes (sw_if_index,
1164                                     im, ip4_af.fib_index,
1165                                     pool_elt_at_index 
1166                                     (lm->if_address_pool, if_address_index));
1167     }
1168
1169   /* If pool did not grow/shrink: add duplicate address. */
1170   if (elts_before != pool_elts (lm->if_address_pool))
1171     {
1172       ip4_add_del_interface_address_callback_t * cb;
1173       vec_foreach (cb, im->add_del_interface_address_callbacks)
1174         cb->function (im, cb->function_opaque, sw_if_index,
1175                       address, address_length,
1176                       if_address_index,
1177                       is_del);
1178     }
1179
1180  done:
1181   vec_free (addr_fib);
1182   return error;
1183 }
1184
1185 clib_error_t *
1186 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1187                                ip4_address_t * address, u32 address_length,
1188                                u32 is_del)
1189 {
1190   return ip4_add_del_interface_address_internal
1191     (vm, sw_if_index, address, address_length,
1192      /* redistribute */ 1,
1193      /* insert_routes */ 1,
1194      is_del);
1195 }
1196
1197 static clib_error_t *
1198 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1199                                 u32 sw_if_index,
1200                                 u32 flags)
1201 {
1202   ip4_main_t * im = &ip4_main;
1203   ip_interface_address_t * ia;
1204   ip4_address_t * a;
1205   u32 is_admin_up, fib_index;
1206   
1207   /* Fill in lookup tables with default table (0). */
1208   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1209   
1210   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1211   
1212   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1213   
1214   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1215
1216   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1217                                 0 /* honor unnumbered */,
1218   ({
1219     a = ip_interface_address_get_address (&im->lookup_main, ia);
1220     if (is_admin_up)
1221       ip4_add_interface_routes (sw_if_index,
1222                                 im, fib_index,
1223                                 ia);
1224     else
1225       ip4_del_interface_routes (im, fib_index,
1226                                 a, ia->address_length);
1227   }));
1228
1229   return 0;
1230 }
1231  
1232 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1233
1234 static clib_error_t *
1235 ip4_sw_interface_add_del (vnet_main_t * vnm,
1236                           u32 sw_if_index,
1237                           u32 is_add)
1238 {
1239   vlib_main_t * vm = vnm->vlib_main;
1240   ip4_main_t * im = &ip4_main;
1241   ip_lookup_main_t * lm = &im->lookup_main;
1242   u32 ci, cast;
1243
1244   for (cast = 0; cast < VNET_N_CAST; cast++)
1245     {
1246       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1247       vnet_config_main_t * vcm = &cm->config_main;
1248
1249       if (! vcm->node_index_by_feature_index)
1250         {
1251           if (cast == VNET_UNICAST)
1252             {
1253               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1254               static char * feature_nodes[] = {
1255                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1256                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1257                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1258                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1259                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1260                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1261               };
1262
1263               vnet_config_init (vm, vcm,
1264                                 start_nodes, ARRAY_LEN (start_nodes),
1265                                 feature_nodes, ARRAY_LEN (feature_nodes));
1266             }
1267           else
1268             {
1269               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1270               static char * feature_nodes[] = {
1271                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1272                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1273               };
1274
1275               vnet_config_init (vm, vcm,
1276                                 start_nodes, ARRAY_LEN (start_nodes),
1277                                 feature_nodes, ARRAY_LEN (feature_nodes));
1278             }
1279         }
1280
1281       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1282       ci = cm->config_index_by_sw_if_index[sw_if_index];
1283
1284       if (is_add)
1285         ci = vnet_config_add_feature (vm, vcm,
1286                                       ci,
1287                                       IP4_RX_FEATURE_LOOKUP,
1288                                       /* config data */ 0,
1289                                       /* # bytes of config data */ 0);
1290       else
1291         ci = vnet_config_del_feature (vm, vcm,
1292                                       ci,
1293                                       IP4_RX_FEATURE_LOOKUP,
1294                                       /* config data */ 0,
1295                                       /* # bytes of config data */ 0);
1296
1297       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1298     }
1299
1300   return /* no error */ 0;
1301 }
1302
1303 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1304
1305
1306 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1307   .function = ip4_lookup,
1308   .name = "ip4-lookup",
1309   .vector_size = sizeof (u32),
1310
1311   .n_next_nodes = IP_LOOKUP_N_NEXT,
1312   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1313 };
1314
1315 /* Global IP4 main. */
1316 ip4_main_t ip4_main;
1317
1318 clib_error_t *
1319 ip4_lookup_init (vlib_main_t * vm)
1320 {
1321   ip4_main_t * im = &ip4_main;
1322   uword i;
1323
1324   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1325     {
1326       u32 m;
1327
1328       if (i < 32)
1329         m = pow2_mask (i) << (32 - i);
1330       else 
1331         m = ~0;
1332       im->fib_masks[i] = clib_host_to_net_u32 (m);
1333     }
1334
1335   /* Create FIB with index 0 and table id of 0. */
1336   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1337
1338   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1339
1340   {
1341     pg_node_t * pn;
1342     pn = pg_get_node (ip4_lookup_node.index);
1343     pn->unformat_edit = unformat_pg_ip4_header;
1344   }
1345
1346   {
1347     ethernet_arp_header_t h;
1348
1349     memset (&h, 0, sizeof (h));
1350
1351     /* Set target ethernet address to all zeros. */
1352     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1353
1354 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1355 #define _8(f,v) h.f = v;
1356     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1357     _16 (l3_type, ETHERNET_TYPE_IP4);
1358     _8 (n_l2_address_bytes, 6);
1359     _8 (n_l3_address_bytes, 4);
1360     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1361 #undef _16
1362 #undef _8
1363
1364     vlib_packet_template_init (vm,
1365                                &im->ip4_arp_request_packet_template,
1366                                /* data */ &h,
1367                                sizeof (h),
1368                                /* alloc chunk size */ 8,
1369                                "ip4 arp");
1370   }
1371
1372   return 0;
1373 }
1374
1375 VLIB_INIT_FUNCTION (ip4_lookup_init);
1376
1377 typedef struct {
1378   /* Adjacency taken. */
1379   u32 adj_index;
1380   u32 flow_hash;
1381   u32 fib_index;
1382
1383   /* Packet data, possibly *after* rewrite. */
1384   u8 packet_data[64 - 1*sizeof(u32)];
1385 } ip4_forward_next_trace_t;
1386
1387 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1388 {
1389   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1390   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1391   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1392   vnet_main_t * vnm = vnet_get_main();
1393   ip4_main_t * im = &ip4_main;
1394   ip_adjacency_t * adj;
1395   uword indent = format_get_indent (s);
1396
1397   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1398   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1399               t->fib_index, t->adj_index, format_ip_adjacency,
1400               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1401   switch (adj->lookup_next_index)
1402     {
1403     case IP_LOOKUP_NEXT_REWRITE:
1404       s = format (s, "\n%U%U",
1405                   format_white_space, indent,
1406                   format_ip_adjacency_packet_data,
1407                   vnm, &im->lookup_main, t->adj_index,
1408                   t->packet_data, sizeof (t->packet_data));
1409       break;
1410
1411     default:
1412       break;
1413     }
1414
1415   return s;
1416 }
1417
1418 /* Common trace function for all ip4-forward next nodes. */
1419 void
1420 ip4_forward_next_trace (vlib_main_t * vm,
1421                         vlib_node_runtime_t * node,
1422                         vlib_frame_t * frame,
1423                         vlib_rx_or_tx_t which_adj_index)
1424 {
1425   u32 * from, n_left;
1426   ip4_main_t * im = &ip4_main;
1427
1428   n_left = frame->n_vectors;
1429   from = vlib_frame_vector_args (frame);
1430   
1431   while (n_left >= 4)
1432     {
1433       u32 bi0, bi1;
1434       vlib_buffer_t * b0, * b1;
1435       ip4_forward_next_trace_t * t0, * t1;
1436
1437       /* Prefetch next iteration. */
1438       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1439       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1440
1441       bi0 = from[0];
1442       bi1 = from[1];
1443
1444       b0 = vlib_get_buffer (vm, bi0);
1445       b1 = vlib_get_buffer (vm, bi1);
1446
1447       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1448         {
1449           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1450           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1451           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1452           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1453                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1454           clib_memcpy (t0->packet_data,
1455                   vlib_buffer_get_current (b0),
1456                   sizeof (t0->packet_data));
1457         }
1458       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1459         {
1460           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1461           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1462           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1463           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1464                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1465           clib_memcpy (t1->packet_data,
1466                   vlib_buffer_get_current (b1),
1467                   sizeof (t1->packet_data));
1468         }
1469       from += 2;
1470       n_left -= 2;
1471     }
1472
1473   while (n_left >= 1)
1474     {
1475       u32 bi0;
1476       vlib_buffer_t * b0;
1477       ip4_forward_next_trace_t * t0;
1478
1479       bi0 = from[0];
1480
1481       b0 = vlib_get_buffer (vm, bi0);
1482
1483       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1484         {
1485           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1486           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1487           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1488           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1489                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1490           clib_memcpy (t0->packet_data,
1491                   vlib_buffer_get_current (b0),
1492                   sizeof (t0->packet_data));
1493         }
1494       from += 1;
1495       n_left -= 1;
1496     }
1497 }
1498
1499 static uword
1500 ip4_drop_or_punt (vlib_main_t * vm,
1501                   vlib_node_runtime_t * node,
1502                   vlib_frame_t * frame,
1503                   ip4_error_t error_code)
1504 {
1505   u32 * buffers = vlib_frame_vector_args (frame);
1506   uword n_packets = frame->n_vectors;
1507
1508   vlib_error_drop_buffers (vm, node,
1509                            buffers,
1510                            /* stride */ 1,
1511                            n_packets,
1512                            /* next */ 0,
1513                            ip4_input_node.index,
1514                            error_code);
1515
1516   if (node->flags & VLIB_NODE_FLAG_TRACE)
1517     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1518
1519   return n_packets;
1520 }
1521
1522 static uword
1523 ip4_drop (vlib_main_t * vm,
1524           vlib_node_runtime_t * node,
1525           vlib_frame_t * frame)
1526 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1527
1528 static uword
1529 ip4_punt (vlib_main_t * vm,
1530           vlib_node_runtime_t * node,
1531           vlib_frame_t * frame)
1532 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1533
1534 static uword
1535 ip4_miss (vlib_main_t * vm,
1536           vlib_node_runtime_t * node,
1537           vlib_frame_t * frame)
1538 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1539
1540 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1541   .function = ip4_drop,
1542   .name = "ip4-drop",
1543   .vector_size = sizeof (u32),
1544
1545   .format_trace = format_ip4_forward_next_trace,
1546
1547   .n_next_nodes = 1,
1548   .next_nodes = {
1549     [0] = "error-drop",
1550   },
1551 };
1552
1553 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1554   .function = ip4_punt,
1555   .name = "ip4-punt",
1556   .vector_size = sizeof (u32),
1557
1558   .format_trace = format_ip4_forward_next_trace,
1559
1560   .n_next_nodes = 1,
1561   .next_nodes = {
1562     [0] = "error-punt",
1563   },
1564 };
1565
1566 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1567   .function = ip4_miss,
1568   .name = "ip4-miss",
1569   .vector_size = sizeof (u32),
1570
1571   .format_trace = format_ip4_forward_next_trace,
1572
1573   .n_next_nodes = 1,
1574   .next_nodes = {
1575     [0] = "error-drop",
1576   },
1577 };
1578
1579 /* Compute TCP/UDP/ICMP4 checksum in software. */
1580 u16
1581 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1582                               ip4_header_t * ip0)
1583 {
1584   ip_csum_t sum0;
1585   u32 ip_header_length, payload_length_host_byte_order;
1586   u32 n_this_buffer, n_bytes_left;
1587   u16 sum16;
1588   void * data_this_buffer;
1589   
1590   /* Initialize checksum with ip header. */
1591   ip_header_length = ip4_header_bytes (ip0);
1592   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1593   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1594
1595   if (BITS (uword) == 32)
1596     {
1597       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1598       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1599     }
1600   else
1601     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1602
1603   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1604   data_this_buffer = (void *) ip0 + ip_header_length;
1605   if (n_this_buffer + ip_header_length > p0->current_length)
1606     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1607   while (1)
1608     {
1609       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1610       n_bytes_left -= n_this_buffer;
1611       if (n_bytes_left == 0)
1612         break;
1613
1614       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1615       p0 = vlib_get_buffer (vm, p0->next_buffer);
1616       data_this_buffer = vlib_buffer_get_current (p0);
1617       n_this_buffer = p0->current_length;
1618     }
1619
1620   sum16 = ~ ip_csum_fold (sum0);
1621
1622   return sum16;
1623 }
1624
1625 static u32
1626 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1627 {
1628   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1629   udp_header_t * udp0;
1630   u16 sum16;
1631
1632   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1633           || ip0->protocol == IP_PROTOCOL_UDP);
1634
1635   udp0 = (void *) (ip0 + 1);
1636   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1637     {
1638       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1639                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1640       return p0->flags;
1641     }
1642
1643   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1644
1645   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1646                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1647
1648   return p0->flags;
1649 }
1650
1651 static uword
1652 ip4_local (vlib_main_t * vm,
1653            vlib_node_runtime_t * node,
1654            vlib_frame_t * frame)
1655 {
1656   ip4_main_t * im = &ip4_main;
1657   ip_lookup_main_t * lm = &im->lookup_main;
1658   ip_local_next_t next_index;
1659   u32 * from, * to_next, n_left_from, n_left_to_next;
1660   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1661
1662   from = vlib_frame_vector_args (frame);
1663   n_left_from = frame->n_vectors;
1664   next_index = node->cached_next_index;
1665   
1666   if (node->flags & VLIB_NODE_FLAG_TRACE)
1667     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1668
1669   while (n_left_from > 0)
1670     {
1671       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1672
1673       while (n_left_from >= 4 && n_left_to_next >= 2)
1674         {
1675           vlib_buffer_t * p0, * p1;
1676           ip4_header_t * ip0, * ip1;
1677           udp_header_t * udp0, * udp1;
1678           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1679           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1680           ip_adjacency_t * adj0, * adj1;
1681           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1682           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1683           i32 len_diff0, len_diff1;
1684           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1685           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1686           u8 enqueue_code;
1687       
1688           pi0 = to_next[0] = from[0];
1689           pi1 = to_next[1] = from[1];
1690           from += 2;
1691           n_left_from -= 2;
1692           to_next += 2;
1693           n_left_to_next -= 2;
1694       
1695           p0 = vlib_get_buffer (vm, pi0);
1696           p1 = vlib_get_buffer (vm, pi1);
1697
1698           ip0 = vlib_buffer_get_current (p0);
1699           ip1 = vlib_buffer_get_current (p1);
1700
1701           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1702                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1703           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1704                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1705
1706           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1707           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1708
1709           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1710
1711           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1712           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1713
1714           proto0 = ip0->protocol;
1715           proto1 = ip1->protocol;
1716           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1717           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1718           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1719           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1720
1721           flags0 = p0->flags;
1722           flags1 = p1->flags;
1723
1724           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1725           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1726
1727           udp0 = ip4_next_header (ip0);
1728           udp1 = ip4_next_header (ip1);
1729
1730           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1731           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1732           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1733
1734           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1735           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1736
1737           /* Verify UDP length. */
1738           ip_len0 = clib_net_to_host_u16 (ip0->length);
1739           ip_len1 = clib_net_to_host_u16 (ip1->length);
1740           udp_len0 = clib_net_to_host_u16 (udp0->length);
1741           udp_len1 = clib_net_to_host_u16 (udp1->length);
1742
1743           len_diff0 = ip_len0 - udp_len0;
1744           len_diff1 = ip_len1 - udp_len1;
1745
1746           len_diff0 = is_udp0 ? len_diff0 : 0;
1747           len_diff1 = is_udp1 ? len_diff1 : 0;
1748
1749           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1750                                 & good_tcp_udp0 & good_tcp_udp1)))
1751             {
1752               if (is_tcp_udp0)
1753                 {
1754                   if (is_tcp_udp0
1755                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1756                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1757                   good_tcp_udp0 =
1758                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1759                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1760                 }
1761               if (is_tcp_udp1)
1762                 {
1763                   if (is_tcp_udp1
1764                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1765                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1766                   good_tcp_udp1 =
1767                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1768                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1769                 }
1770             }
1771
1772           good_tcp_udp0 &= len_diff0 >= 0;
1773           good_tcp_udp1 &= len_diff1 >= 0;
1774
1775           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1776           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1777
1778           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1779
1780           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1781           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1782
1783           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1784           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1785                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1786                     : error0);
1787           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1788                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1789                     : error1);
1790
1791           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1792           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1793
1794           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1795           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1796
1797           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1798           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1799
1800           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1801                                                            &ip0->src_address,
1802                                                            /* no_default_route */ 1));
1803           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1804                                                            &ip1->src_address,
1805                                                            /* no_default_route */ 1));
1806
1807           adj0 = ip_get_adjacency (lm, adj_index0);
1808           adj1 = ip_get_adjacency (lm, adj_index1);
1809
1810           /* 
1811            * Must have a route to source otherwise we drop the packet.
1812            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1813            */
1814           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1815                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1816                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1817                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1818                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1819                     ? IP4_ERROR_SRC_LOOKUP_MISS
1820                     : error0);
1821           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1822                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1823                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1824                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1825                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1826                     ? IP4_ERROR_SRC_LOOKUP_MISS
1827                     : error1);
1828
1829           next0 = lm->local_next_by_ip_protocol[proto0];
1830           next1 = lm->local_next_by_ip_protocol[proto1];
1831
1832           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1833           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1834
1835           p0->error = error0 ? error_node->errors[error0] : 0;
1836           p1->error = error1 ? error_node->errors[error1] : 0;
1837
1838           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1839
1840           if (PREDICT_FALSE (enqueue_code != 0))
1841             {
1842               switch (enqueue_code)
1843                 {
1844                 case 1:
1845                   /* A B A */
1846                   to_next[-2] = pi1;
1847                   to_next -= 1;
1848                   n_left_to_next += 1;
1849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1850                   break;
1851
1852                 case 2:
1853                   /* A A B */
1854                   to_next -= 1;
1855                   n_left_to_next += 1;
1856                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1857                   break;
1858
1859                 case 3:
1860                   /* A B B or A B C */
1861                   to_next -= 2;
1862                   n_left_to_next += 2;
1863                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1864                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1865                   if (next0 == next1)
1866                     {
1867                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1868                       next_index = next1;
1869                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1870                     }
1871                   break;
1872                 }
1873             }
1874         }
1875
1876       while (n_left_from > 0 && n_left_to_next > 0)
1877         {
1878           vlib_buffer_t * p0;
1879           ip4_header_t * ip0;
1880           udp_header_t * udp0;
1881           ip4_fib_mtrie_t * mtrie0;
1882           ip4_fib_mtrie_leaf_t leaf0;
1883           ip_adjacency_t * adj0;
1884           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1885           i32 len_diff0;
1886           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1887       
1888           pi0 = to_next[0] = from[0];
1889           from += 1;
1890           n_left_from -= 1;
1891           to_next += 1;
1892           n_left_to_next -= 1;
1893       
1894           p0 = vlib_get_buffer (vm, pi0);
1895
1896           ip0 = vlib_buffer_get_current (p0);
1897
1898           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1899                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1900
1901           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1902
1903           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1904
1905           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1906
1907           proto0 = ip0->protocol;
1908           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1909           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1910
1911           flags0 = p0->flags;
1912
1913           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1914
1915           udp0 = ip4_next_header (ip0);
1916
1917           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1918           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1919
1920           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1921
1922           /* Verify UDP length. */
1923           ip_len0 = clib_net_to_host_u16 (ip0->length);
1924           udp_len0 = clib_net_to_host_u16 (udp0->length);
1925
1926           len_diff0 = ip_len0 - udp_len0;
1927
1928           len_diff0 = is_udp0 ? len_diff0 : 0;
1929
1930           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1931             {
1932               if (is_tcp_udp0)
1933                 {
1934                   if (is_tcp_udp0
1935                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1936                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1937                   good_tcp_udp0 =
1938                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1939                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1940                 }
1941             }
1942
1943           good_tcp_udp0 &= len_diff0 >= 0;
1944
1945           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1946
1947           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1948
1949           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1950
1951           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1952           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1953                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1954                     : error0);
1955
1956           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1957
1958           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1959           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1960
1961           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1962                                                            &ip0->src_address,
1963                                                            /* no_default_route */ 1));
1964
1965           adj0 = ip_get_adjacency (lm, adj_index0);
1966
1967           /* Must have a route to source otherwise we drop the packet. */
1968           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1969                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1970                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1971                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1972                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1973                     ? IP4_ERROR_SRC_LOOKUP_MISS
1974                     : error0);
1975
1976           next0 = lm->local_next_by_ip_protocol[proto0];
1977
1978           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1979
1980           p0->error = error0? error_node->errors[error0] : 0;
1981
1982           if (PREDICT_FALSE (next0 != next_index))
1983             {
1984               n_left_to_next += 1;
1985               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1986
1987               next_index = next0;
1988               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1989               to_next[0] = pi0;
1990               to_next += 1;
1991               n_left_to_next -= 1;
1992             }
1993         }
1994   
1995       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1996     }
1997
1998   return frame->n_vectors;
1999 }
2000
2001 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2002   .function = ip4_local,
2003   .name = "ip4-local",
2004   .vector_size = sizeof (u32),
2005
2006   .format_trace = format_ip4_forward_next_trace,
2007
2008   .n_next_nodes = IP_LOCAL_N_NEXT,
2009   .next_nodes = {
2010     [IP_LOCAL_NEXT_DROP] = "error-drop",
2011     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2012     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2013     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2014   },
2015 };
2016
2017 void ip4_register_protocol (u32 protocol, u32 node_index)
2018 {
2019   vlib_main_t * vm = vlib_get_main();
2020   ip4_main_t * im = &ip4_main;
2021   ip_lookup_main_t * lm = &im->lookup_main;
2022
2023   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2024   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2025 }
2026
2027 static clib_error_t *
2028 show_ip_local_command_fn (vlib_main_t * vm,
2029                           unformat_input_t * input,
2030                          vlib_cli_command_t * cmd)
2031 {
2032   ip4_main_t * im = &ip4_main;
2033   ip_lookup_main_t * lm = &im->lookup_main;
2034   int i;
2035
2036   vlib_cli_output (vm, "Protocols handled by ip4_local");
2037   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2038     {
2039       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2040         vlib_cli_output (vm, "%d", i);
2041     }
2042   return 0;
2043 }
2044
2045
2046
2047 VLIB_CLI_COMMAND (show_ip_local, static) = {
2048   .path = "show ip local",
2049   .function = show_ip_local_command_fn,
2050   .short_help = "Show ip local protocol table",
2051 };
2052
2053 static uword
2054 ip4_arp (vlib_main_t * vm,
2055          vlib_node_runtime_t * node,
2056          vlib_frame_t * frame)
2057 {
2058   vnet_main_t * vnm = vnet_get_main();
2059   ip4_main_t * im = &ip4_main;
2060   ip_lookup_main_t * lm = &im->lookup_main;
2061   u32 * from, * to_next_drop;
2062   uword n_left_from, n_left_to_next_drop, next_index;
2063   static f64 time_last_seed_change = -1e100;
2064   static u32 hash_seeds[3];
2065   static uword hash_bitmap[256 / BITS (uword)]; 
2066   f64 time_now;
2067
2068   if (node->flags & VLIB_NODE_FLAG_TRACE)
2069     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2070
2071   time_now = vlib_time_now (vm);
2072   if (time_now - time_last_seed_change > 1e-3)
2073     {
2074       uword i;
2075       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2076                                              sizeof (hash_seeds));
2077       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2078         hash_seeds[i] = r[i];
2079
2080       /* Mark all hash keys as been no-seen before. */
2081       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2082         hash_bitmap[i] = 0;
2083
2084       time_last_seed_change = time_now;
2085     }
2086
2087   from = vlib_frame_vector_args (frame);
2088   n_left_from = frame->n_vectors;
2089   next_index = node->cached_next_index;
2090   if (next_index == IP4_ARP_NEXT_DROP)
2091     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2092
2093   while (n_left_from > 0)
2094     {
2095       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2096                            to_next_drop, n_left_to_next_drop);
2097
2098       while (n_left_from > 0 && n_left_to_next_drop > 0)
2099         {
2100           vlib_buffer_t * p0;
2101           ip4_header_t * ip0;
2102           ethernet_header_t * eh0;
2103           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2104           uword bm0;
2105           ip_adjacency_t * adj0;
2106
2107           pi0 = from[0];
2108
2109           p0 = vlib_get_buffer (vm, pi0);
2110
2111           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2112           adj0 = ip_get_adjacency (lm, adj_index0);
2113           ip0 = vlib_buffer_get_current (p0);
2114
2115           /* If packet destination is not local, send ARP to next hop */
2116           if (adj0->arp.next_hop.ip4.as_u32)
2117             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2118
2119           /* 
2120            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2121            * rewrite to this packet, we need to skip it here.
2122            * Note, to distinguish from src IP addr *.8.6.*, we
2123            * check for a bcast eth dest instead of IPv4 version.
2124            */
2125           eh0 = (ethernet_header_t*)ip0;
2126           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2127             {
2128               u32 vlan_num = 0;
2129               u16 * etype = &eh0->type;
2130               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2131                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2132                 {
2133                   vlan_num += 1;
2134                   etype += 2; //vlan tag also 16 bits, same as etype
2135                 }
2136               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2137                 {
2138                   vlib_buffer_advance (
2139                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2140                   ip0 = vlib_buffer_get_current (p0);
2141                 }
2142             }
2143
2144           a0 = hash_seeds[0];
2145           b0 = hash_seeds[1];
2146           c0 = hash_seeds[2];
2147
2148           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2149           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2150
2151           a0 ^= ip0->dst_address.data_u32;
2152           b0 ^= sw_if_index0;
2153
2154           hash_v3_finalize32 (a0, b0, c0);
2155
2156           c0 &= BITS (hash_bitmap) - 1;
2157           c0 = c0 / BITS (uword);
2158           m0 = (uword) 1 << (c0 % BITS (uword));
2159
2160           bm0 = hash_bitmap[c0];
2161           drop0 = (bm0 & m0) != 0;
2162
2163           /* Mark it as seen. */
2164           hash_bitmap[c0] = bm0 | m0;
2165
2166           from += 1;
2167           n_left_from -= 1;
2168           to_next_drop[0] = pi0;
2169           to_next_drop += 1;
2170           n_left_to_next_drop -= 1;
2171
2172           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2173
2174           if (drop0)
2175             continue;
2176
2177           /* 
2178            * Can happen if the control-plane is programming tables
2179            * with traffic flowing; at least that's today's lame excuse.
2180            */
2181           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2182             {
2183               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2184             }
2185           else
2186           /* Send ARP request. */
2187           {
2188             u32 bi0 = 0;
2189             vlib_buffer_t * b0;
2190             ethernet_arp_header_t * h0;
2191             vnet_hw_interface_t * hw_if0;
2192
2193             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2194
2195             /* Add rewrite/encap string for ARP packet. */
2196             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2197
2198             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2199
2200             /* Src ethernet address in ARP header. */
2201             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2202                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2203
2204             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2205
2206             /* Copy in destination address we are requesting. */
2207             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2208
2209             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2210             b0 = vlib_get_buffer (vm, bi0);
2211             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2212
2213             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2214
2215             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2216           }
2217         }
2218
2219       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2220     }
2221
2222   return frame->n_vectors;
2223 }
2224
2225 static char * ip4_arp_error_strings[] = {
2226   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2227   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2228   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2229   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2230   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2231 };
2232
2233 VLIB_REGISTER_NODE (ip4_arp_node) = {
2234   .function = ip4_arp,
2235   .name = "ip4-arp",
2236   .vector_size = sizeof (u32),
2237
2238   .format_trace = format_ip4_forward_next_trace,
2239
2240   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2241   .error_strings = ip4_arp_error_strings,
2242
2243   .n_next_nodes = IP4_ARP_N_NEXT,
2244   .next_nodes = {
2245     [IP4_ARP_NEXT_DROP] = "error-drop",
2246   },
2247 };
2248
2249 #define foreach_notrace_ip4_arp_error           \
2250 _(DROP)                                         \
2251 _(REQUEST_SENT)                                 \
2252 _(REPLICATE_DROP)                               \
2253 _(REPLICATE_FAIL)
2254
2255 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2256 {
2257   vlib_node_runtime_t *rt = 
2258     vlib_node_get_runtime (vm, ip4_arp_node.index);
2259
2260   /* don't trace ARP request packets */
2261 #define _(a)                                    \
2262     vnet_pcap_drop_trace_filter_add_del         \
2263         (rt->errors[IP4_ARP_ERROR_##a],         \
2264          1 /* is_add */);
2265     foreach_notrace_ip4_arp_error;
2266 #undef _
2267   return 0;
2268 }
2269
2270 VLIB_INIT_FUNCTION(arp_notrace_init);
2271
2272
2273 /* Send an ARP request to see if given destination is reachable on given interface. */
2274 clib_error_t *
2275 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2276 {
2277   vnet_main_t * vnm = vnet_get_main();
2278   ip4_main_t * im = &ip4_main;
2279   ethernet_arp_header_t * h;
2280   ip4_address_t * src;
2281   ip_interface_address_t * ia;
2282   ip_adjacency_t * adj;
2283   vnet_hw_interface_t * hi;
2284   vnet_sw_interface_t * si;
2285   vlib_buffer_t * b;
2286   u32 bi = 0;
2287
2288   si = vnet_get_sw_interface (vnm, sw_if_index);
2289
2290   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2291     {
2292       return clib_error_return (0, "%U: interface %U down",
2293                                 format_ip4_address, dst, 
2294                                 format_vnet_sw_if_index_name, vnm, 
2295                                 sw_if_index);
2296     }
2297
2298   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2299   if (! src)
2300     {
2301       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2302       return clib_error_return 
2303         (0, "no matching interface address for destination %U (interface %U)",
2304          format_ip4_address, dst,
2305          format_vnet_sw_if_index_name, vnm, sw_if_index);
2306     }
2307
2308   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2309
2310   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2311
2312   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2313
2314   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2315
2316   h->ip4_over_ethernet[0].ip4 = src[0];
2317   h->ip4_over_ethernet[1].ip4 = dst[0];
2318
2319   b = vlib_get_buffer (vm, bi);
2320   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2321
2322   /* Add encapsulation string for software interface (e.g. ethernet header). */
2323   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2324   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2325
2326   {
2327     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2328     u32 * to_next = vlib_frame_vector_args (f);
2329     to_next[0] = bi;
2330     f->n_vectors = 1;
2331     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2332   }
2333
2334   return /* no error */ 0;
2335 }
2336
2337 typedef enum {
2338   IP4_REWRITE_NEXT_DROP,
2339   IP4_REWRITE_NEXT_ARP,
2340 } ip4_rewrite_next_t;
2341
2342 always_inline uword
2343 ip4_rewrite_inline (vlib_main_t * vm,
2344                     vlib_node_runtime_t * node,
2345                     vlib_frame_t * frame,
2346                     int rewrite_for_locally_received_packets)
2347 {
2348   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2349   u32 * from = vlib_frame_vector_args (frame);
2350   u32 n_left_from, n_left_to_next, * to_next, next_index;
2351   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2352   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2353
2354   n_left_from = frame->n_vectors;
2355   next_index = node->cached_next_index;
2356   u32 cpu_index = os_get_cpu_number();
2357   
2358   while (n_left_from > 0)
2359     {
2360       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2361
2362       while (n_left_from >= 4 && n_left_to_next >= 2)
2363         {
2364           ip_adjacency_t * adj0, * adj1;
2365           vlib_buffer_t * p0, * p1;
2366           ip4_header_t * ip0, * ip1;
2367           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2368           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2369           u32 next0_override, next1_override;
2370       
2371           if (rewrite_for_locally_received_packets)
2372               next0_override = next1_override = 0;
2373
2374           /* Prefetch next iteration. */
2375           {
2376             vlib_buffer_t * p2, * p3;
2377
2378             p2 = vlib_get_buffer (vm, from[2]);
2379             p3 = vlib_get_buffer (vm, from[3]);
2380
2381             vlib_prefetch_buffer_header (p2, STORE);
2382             vlib_prefetch_buffer_header (p3, STORE);
2383
2384             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2385             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2386           }
2387
2388           pi0 = to_next[0] = from[0];
2389           pi1 = to_next[1] = from[1];
2390
2391           from += 2;
2392           n_left_from -= 2;
2393           to_next += 2;
2394           n_left_to_next -= 2;
2395       
2396           p0 = vlib_get_buffer (vm, pi0);
2397           p1 = vlib_get_buffer (vm, pi1);
2398
2399           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2400           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2401
2402           /* We should never rewrite a pkt using the MISS adjacency */
2403           ASSERT(adj_index0 && adj_index1);
2404
2405           ip0 = vlib_buffer_get_current (p0);
2406           ip1 = vlib_buffer_get_current (p1);
2407
2408           error0 = error1 = IP4_ERROR_NONE;
2409
2410           /* Decrement TTL & update checksum.
2411              Works either endian, so no need for byte swap. */
2412           if (! rewrite_for_locally_received_packets)
2413             {
2414               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2415
2416               /* Input node should have reject packets with ttl 0. */
2417               ASSERT (ip0->ttl > 0);
2418               ASSERT (ip1->ttl > 0);
2419
2420               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2421               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2422
2423               checksum0 += checksum0 >= 0xffff;
2424               checksum1 += checksum1 >= 0xffff;
2425
2426               ip0->checksum = checksum0;
2427               ip1->checksum = checksum1;
2428
2429               ttl0 -= 1;
2430               ttl1 -= 1;
2431
2432               ip0->ttl = ttl0;
2433               ip1->ttl = ttl1;
2434
2435               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2436               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2437
2438               /* Verify checksum. */
2439               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2440               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2441             }
2442
2443           /* Rewrite packet header and updates lengths. */
2444           adj0 = ip_get_adjacency (lm, adj_index0);
2445           adj1 = ip_get_adjacency (lm, adj_index1);
2446       
2447           if (rewrite_for_locally_received_packets)
2448             {
2449               /*
2450                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2451                * we end up here with a local adjacency in hand
2452                * The local adj rewrite data is 0xfefe on purpose.
2453                * Bad engineer, no donut for you.
2454                */
2455               if (PREDICT_FALSE(adj0->lookup_next_index 
2456                                 == IP_LOOKUP_NEXT_LOCAL))
2457                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2458               if (PREDICT_FALSE(adj0->lookup_next_index
2459                                 == IP_LOOKUP_NEXT_ARP))
2460                 next0_override = IP4_REWRITE_NEXT_ARP;
2461               if (PREDICT_FALSE(adj1->lookup_next_index 
2462                                 == IP_LOOKUP_NEXT_LOCAL))
2463                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2464               if (PREDICT_FALSE(adj1->lookup_next_index
2465                                 == IP_LOOKUP_NEXT_ARP))
2466                 next1_override = IP4_REWRITE_NEXT_ARP;
2467             }
2468
2469           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2470           rw_len0 = adj0[0].rewrite_header.data_bytes;
2471           rw_len1 = adj1[0].rewrite_header.data_bytes;
2472           next0 = (error0 == IP4_ERROR_NONE) 
2473             ? adj0[0].rewrite_header.next_index : 0;
2474
2475           if (rewrite_for_locally_received_packets)
2476               next0 = next0 && next0_override ? next0_override : next0;
2477
2478           next1 = (error1 == IP4_ERROR_NONE)
2479             ? adj1[0].rewrite_header.next_index : 0;
2480
2481           if (rewrite_for_locally_received_packets)
2482               next1 = next1 && next1_override ? next1_override : next1;
2483
2484           /* 
2485            * We've already accounted for an ethernet_header_t elsewhere
2486            */
2487           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2488               vlib_increment_combined_counter 
2489                   (&lm->adjacency_counters,
2490                    cpu_index, adj_index0, 
2491                    /* packet increment */ 0,
2492                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2493
2494           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2495               vlib_increment_combined_counter 
2496                   (&lm->adjacency_counters,
2497                    cpu_index, adj_index1, 
2498                    /* packet increment */ 0,
2499                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2500
2501           /* Check MTU of outgoing interface. */
2502           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2503                     ? IP4_ERROR_MTU_EXCEEDED
2504                     : error0);
2505           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2506                     ? IP4_ERROR_MTU_EXCEEDED
2507                     : error1);
2508
2509           p0->current_data -= rw_len0;
2510           p1->current_data -= rw_len1;
2511
2512           p0->current_length += rw_len0;
2513           p1->current_length += rw_len1;
2514
2515           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2516           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2517       
2518           p0->error = error_node->errors[error0];
2519           p1->error = error_node->errors[error1];
2520
2521           /* Guess we are only writing on simple Ethernet header. */
2522           vnet_rewrite_two_headers (adj0[0], adj1[0],
2523                                     ip0, ip1,
2524                                     sizeof (ethernet_header_t));
2525       
2526           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2527                                            to_next, n_left_to_next,
2528                                            pi0, pi1, next0, next1);
2529         }
2530
2531       while (n_left_from > 0 && n_left_to_next > 0)
2532         {
2533           ip_adjacency_t * adj0;
2534           vlib_buffer_t * p0;
2535           ip4_header_t * ip0;
2536           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2537           u32 next0_override;
2538       
2539           if (rewrite_for_locally_received_packets)
2540               next0_override = 0;
2541
2542           pi0 = to_next[0] = from[0];
2543
2544           p0 = vlib_get_buffer (vm, pi0);
2545
2546           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2547
2548           /* We should never rewrite a pkt using the MISS adjacency */
2549           ASSERT(adj_index0);
2550
2551           adj0 = ip_get_adjacency (lm, adj_index0);
2552       
2553           ip0 = vlib_buffer_get_current (p0);
2554
2555           error0 = IP4_ERROR_NONE;
2556           next0 = 0;            /* drop on error */
2557
2558           /* Decrement TTL & update checksum. */
2559           if (! rewrite_for_locally_received_packets)
2560             {
2561               i32 ttl0 = ip0->ttl;
2562
2563               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2564
2565               checksum0 += checksum0 >= 0xffff;
2566
2567               ip0->checksum = checksum0;
2568
2569               ASSERT (ip0->ttl > 0);
2570
2571               ttl0 -= 1;
2572
2573               ip0->ttl = ttl0;
2574
2575               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2576
2577               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2578             }
2579
2580           if (rewrite_for_locally_received_packets)
2581             {
2582               /*
2583                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2584                * we end up here with a local adjacency in hand
2585                * The local adj rewrite data is 0xfefe on purpose.
2586                * Bad engineer, no donut for you.
2587                */
2588               if (PREDICT_FALSE(adj0->lookup_next_index 
2589                                 == IP_LOOKUP_NEXT_LOCAL))
2590                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2591               /* 
2592                * We have to override the next_index in ARP adjacencies,
2593                * because they're set up for ip4-arp, not this node...
2594                */
2595               if (PREDICT_FALSE(adj0->lookup_next_index
2596                                 == IP_LOOKUP_NEXT_ARP))
2597                 next0_override = IP4_REWRITE_NEXT_ARP;
2598             }
2599
2600           /* Guess we are only writing on simple Ethernet header. */
2601           vnet_rewrite_one_header (adj0[0], ip0, 
2602                                    sizeof (ethernet_header_t));
2603           
2604           /* Update packet buffer attributes/set output interface. */
2605           rw_len0 = adj0[0].rewrite_header.data_bytes;
2606           
2607           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2608               vlib_increment_combined_counter 
2609                   (&lm->adjacency_counters,
2610                    cpu_index, adj_index0, 
2611                    /* packet increment */ 0,
2612                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2613           
2614           /* Check MTU of outgoing interface. */
2615           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2616                     > adj0[0].rewrite_header.max_l3_packet_bytes
2617                     ? IP4_ERROR_MTU_EXCEEDED
2618                     : error0);
2619           
2620           p0->error = error_node->errors[error0];
2621           p0->current_data -= rw_len0;
2622           p0->current_length += rw_len0;
2623           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2624             adj0[0].rewrite_header.sw_if_index;
2625           
2626           next0 = (error0 == IP4_ERROR_NONE)
2627             ? adj0[0].rewrite_header.next_index : 0;
2628
2629           if (rewrite_for_locally_received_packets)
2630               next0 = next0 && next0_override ? next0_override : next0;
2631
2632           from += 1;
2633           n_left_from -= 1;
2634           to_next += 1;
2635           n_left_to_next -= 1;
2636       
2637           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2638                                            to_next, n_left_to_next,
2639                                            pi0, next0);
2640         }
2641   
2642       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2643     }
2644
2645   /* Need to do trace after rewrites to pick up new packet data. */
2646   if (node->flags & VLIB_NODE_FLAG_TRACE)
2647     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2648
2649   return frame->n_vectors;
2650 }
2651
2652 static uword
2653 ip4_rewrite_transit (vlib_main_t * vm,
2654                      vlib_node_runtime_t * node,
2655                      vlib_frame_t * frame)
2656 {
2657   return ip4_rewrite_inline (vm, node, frame,
2658                              /* rewrite_for_locally_received_packets */ 0);
2659 }
2660
2661 static uword
2662 ip4_rewrite_local (vlib_main_t * vm,
2663                    vlib_node_runtime_t * node,
2664                    vlib_frame_t * frame)
2665 {
2666   return ip4_rewrite_inline (vm, node, frame,
2667                              /* rewrite_for_locally_received_packets */ 1);
2668 }
2669
2670 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2671   .function = ip4_rewrite_transit,
2672   .name = "ip4-rewrite-transit",
2673   .vector_size = sizeof (u32),
2674
2675   .format_trace = format_ip4_forward_next_trace,
2676
2677   .n_next_nodes = 2,
2678   .next_nodes = {
2679     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2680     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2681   },
2682 };
2683
2684 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2685   .function = ip4_rewrite_local,
2686   .name = "ip4-rewrite-local",
2687   .vector_size = sizeof (u32),
2688
2689   .sibling_of = "ip4-rewrite-transit",
2690
2691   .format_trace = format_ip4_forward_next_trace,
2692
2693   .n_next_nodes = 2,
2694   .next_nodes = {
2695     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2696     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2697   },
2698 };
2699
2700 static clib_error_t *
2701 add_del_interface_table (vlib_main_t * vm,
2702                          unformat_input_t * input,
2703                          vlib_cli_command_t * cmd)
2704 {
2705   vnet_main_t * vnm = vnet_get_main();
2706   clib_error_t * error = 0;
2707   u32 sw_if_index, table_id;
2708
2709   sw_if_index = ~0;
2710
2711   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2712     {
2713       error = clib_error_return (0, "unknown interface `%U'",
2714                                  format_unformat_error, input);
2715       goto done;
2716     }
2717
2718   if (unformat (input, "%d", &table_id))
2719     ;
2720   else
2721     {
2722       error = clib_error_return (0, "expected table id `%U'",
2723                                  format_unformat_error, input);
2724       goto done;
2725     }
2726
2727   {
2728     ip4_main_t * im = &ip4_main;
2729     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2730
2731     if (fib) 
2732       {
2733         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2734         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2735     }
2736   }
2737
2738  done:
2739   return error;
2740 }
2741
2742 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2743   .path = "set interface ip table",
2744   .function = add_del_interface_table,
2745   .short_help = "Add/delete FIB table id for interface",
2746 };
2747
2748
2749 static uword
2750 ip4_lookup_multicast (vlib_main_t * vm,
2751                       vlib_node_runtime_t * node,
2752                       vlib_frame_t * frame)
2753 {
2754   ip4_main_t * im = &ip4_main;
2755   ip_lookup_main_t * lm = &im->lookup_main;
2756   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2757   u32 n_left_from, n_left_to_next, * from, * to_next;
2758   ip_lookup_next_t next;
2759   u32 cpu_index = os_get_cpu_number();
2760
2761   from = vlib_frame_vector_args (frame);
2762   n_left_from = frame->n_vectors;
2763   next = node->cached_next_index;
2764
2765   while (n_left_from > 0)
2766     {
2767       vlib_get_next_frame (vm, node, next,
2768                            to_next, n_left_to_next);
2769
2770       while (n_left_from >= 4 && n_left_to_next >= 2)
2771         {
2772           vlib_buffer_t * p0, * p1;
2773           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2774           ip_lookup_next_t next0, next1;
2775           ip4_header_t * ip0, * ip1;
2776           ip_adjacency_t * adj0, * adj1;
2777           u32 fib_index0, fib_index1;
2778           u32 flow_hash_config0, flow_hash_config1;
2779
2780           /* Prefetch next iteration. */
2781           {
2782             vlib_buffer_t * p2, * p3;
2783
2784             p2 = vlib_get_buffer (vm, from[2]);
2785             p3 = vlib_get_buffer (vm, from[3]);
2786
2787             vlib_prefetch_buffer_header (p2, LOAD);
2788             vlib_prefetch_buffer_header (p3, LOAD);
2789
2790             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2791             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2792           }
2793
2794           pi0 = to_next[0] = from[0];
2795           pi1 = to_next[1] = from[1];
2796
2797           p0 = vlib_get_buffer (vm, pi0);
2798           p1 = vlib_get_buffer (vm, pi1);
2799
2800           ip0 = vlib_buffer_get_current (p0);
2801           ip1 = vlib_buffer_get_current (p1);
2802
2803           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2804           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2805           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2806             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2807           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2808             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2809
2810           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2811                                               &ip0->dst_address, p0);
2812           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2813                                               &ip1->dst_address, p1);
2814
2815           adj0 = ip_get_adjacency (lm, adj_index0);
2816           adj1 = ip_get_adjacency (lm, adj_index1);
2817
2818           next0 = adj0->lookup_next_index;
2819           next1 = adj1->lookup_next_index;
2820
2821           flow_hash_config0 = 
2822               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2823
2824           flow_hash_config1 = 
2825               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2826
2827           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2828               (ip0, flow_hash_config0);
2829                                                                   
2830           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2831               (ip1, flow_hash_config1);
2832
2833           ASSERT (adj0->n_adj > 0);
2834           ASSERT (adj1->n_adj > 0);
2835           ASSERT (is_pow2 (adj0->n_adj));
2836           ASSERT (is_pow2 (adj1->n_adj));
2837           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2838           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2839
2840           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2841           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2842
2843           if (1) /* $$$$$$ HACK FIXME */
2844           vlib_increment_combined_counter 
2845               (cm, cpu_index, adj_index0, 1,
2846                vlib_buffer_length_in_chain (vm, p0));
2847           if (1) /* $$$$$$ HACK FIXME */
2848           vlib_increment_combined_counter 
2849               (cm, cpu_index, adj_index1, 1,
2850                vlib_buffer_length_in_chain (vm, p1));
2851
2852           from += 2;
2853           to_next += 2;
2854           n_left_to_next -= 2;
2855           n_left_from -= 2;
2856
2857           wrong_next = (next0 != next) + 2*(next1 != next);
2858           if (PREDICT_FALSE (wrong_next != 0))
2859             {
2860               switch (wrong_next)
2861                 {
2862                 case 1:
2863                   /* A B A */
2864                   to_next[-2] = pi1;
2865                   to_next -= 1;
2866                   n_left_to_next += 1;
2867                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2868                   break;
2869
2870                 case 2:
2871                   /* A A B */
2872                   to_next -= 1;
2873                   n_left_to_next += 1;
2874                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2875                   break;
2876
2877                 case 3:
2878                   /* A B C */
2879                   to_next -= 2;
2880                   n_left_to_next += 2;
2881                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2882                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2883                   if (next0 == next1)
2884                     {
2885                       /* A B B */
2886                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2887                       next = next1;
2888                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2889                     }
2890                 }
2891             }
2892         }
2893     
2894       while (n_left_from > 0 && n_left_to_next > 0)
2895         {
2896           vlib_buffer_t * p0;
2897           ip4_header_t * ip0;
2898           u32 pi0, adj_index0;
2899           ip_lookup_next_t next0;
2900           ip_adjacency_t * adj0;
2901           u32 fib_index0;
2902           u32 flow_hash_config0;
2903
2904           pi0 = from[0];
2905           to_next[0] = pi0;
2906
2907           p0 = vlib_get_buffer (vm, pi0);
2908
2909           ip0 = vlib_buffer_get_current (p0);
2910
2911           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2912                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2913           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2914               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2915           
2916           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2917                                               &ip0->dst_address, p0);
2918
2919           adj0 = ip_get_adjacency (lm, adj_index0);
2920
2921           next0 = adj0->lookup_next_index;
2922
2923           flow_hash_config0 = 
2924               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2925
2926           vnet_buffer (p0)->ip.flow_hash = 
2927             ip4_compute_flow_hash (ip0, flow_hash_config0);
2928
2929           ASSERT (adj0->n_adj > 0);
2930           ASSERT (is_pow2 (adj0->n_adj));
2931           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2932
2933           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2934
2935           if (1) /* $$$$$$ HACK FIXME */
2936               vlib_increment_combined_counter 
2937                   (cm, cpu_index, adj_index0, 1,
2938                    vlib_buffer_length_in_chain (vm, p0));
2939
2940           from += 1;
2941           to_next += 1;
2942           n_left_to_next -= 1;
2943           n_left_from -= 1;
2944
2945           if (PREDICT_FALSE (next0 != next))
2946             {
2947               n_left_to_next += 1;
2948               vlib_put_next_frame (vm, node, next, n_left_to_next);
2949               next = next0;
2950               vlib_get_next_frame (vm, node, next,
2951                                    to_next, n_left_to_next);
2952               to_next[0] = pi0;
2953               to_next += 1;
2954               n_left_to_next -= 1;
2955             }
2956         }
2957
2958       vlib_put_next_frame (vm, node, next, n_left_to_next);
2959     }
2960
2961   return frame->n_vectors;
2962 }
2963
2964 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2965   .function = ip4_lookup_multicast,
2966   .name = "ip4-lookup-multicast",
2967   .vector_size = sizeof (u32),
2968
2969   .n_next_nodes = IP_LOOKUP_N_NEXT,
2970   .next_nodes = IP4_LOOKUP_NEXT_NODES,
2971 };
2972
2973 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2974   .function = ip4_drop,
2975   .name = "ip4-multicast",
2976   .vector_size = sizeof (u32),
2977
2978   .format_trace = format_ip4_forward_next_trace,
2979
2980   .n_next_nodes = 1,
2981   .next_nodes = {
2982     [0] = "error-drop",
2983   },
2984 };
2985
2986 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2987 {
2988   ip4_main_t * im = &ip4_main;
2989   ip4_fib_mtrie_t * mtrie0;
2990   ip4_fib_mtrie_leaf_t leaf0;
2991   u32 adj_index0;
2992     
2993   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2994
2995   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2996   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2997   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2998   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2999   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3000   
3001   /* Handle default route. */
3002   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3003   
3004   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3005   
3006   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3007                                                   a, 
3008                                                   /* no_default_route */ 0);
3009 }
3010  
3011 static clib_error_t *
3012 test_lookup_command_fn (vlib_main_t * vm,
3013                         unformat_input_t * input,
3014                         vlib_cli_command_t * cmd)
3015 {
3016   u32 table_id = 0;
3017   f64 count = 1;
3018   u32 n;
3019   int i;
3020   ip4_address_t ip4_base_address;
3021   u64 errors = 0;
3022
3023   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3024       if (unformat (input, "table %d", &table_id))
3025         ;
3026       else if (unformat (input, "count %f", &count))
3027         ;
3028
3029       else if (unformat (input, "%U",
3030                          unformat_ip4_address, &ip4_base_address))
3031         ;
3032       else
3033         return clib_error_return (0, "unknown input `%U'",
3034                                   format_unformat_error, input);
3035   }
3036
3037   n = count;
3038
3039   for (i = 0; i < n; i++)
3040     {
3041       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3042         errors++;
3043
3044       ip4_base_address.as_u32 = 
3045         clib_host_to_net_u32 (1 + 
3046                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3047     }
3048
3049   if (errors) 
3050     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3051   else
3052     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3053
3054   return 0;
3055 }
3056
3057 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3058     .path = "test lookup",
3059     .short_help = "test lookup",
3060     .function = test_lookup_command_fn,
3061 };
3062
3063 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3064 {
3065   ip4_main_t * im4 = &ip4_main;
3066   ip4_fib_t * fib;
3067   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3068
3069   if (p == 0)
3070     return VNET_API_ERROR_NO_SUCH_FIB;
3071
3072   fib = vec_elt_at_index (im4->fibs, p[0]);
3073
3074   fib->flow_hash_config = flow_hash_config;
3075   return 0;
3076 }
3077  
3078 static clib_error_t *
3079 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3080                              unformat_input_t * input,
3081                              vlib_cli_command_t * cmd)
3082 {
3083   int matched = 0;
3084   u32 table_id = 0;
3085   u32 flow_hash_config = 0;
3086   int rv;
3087
3088   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3089     if (unformat (input, "table %d", &table_id))
3090       matched = 1;
3091 #define _(a,v) \
3092     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3093     foreach_flow_hash_bit
3094 #undef _
3095     else break;
3096   }
3097   
3098   if (matched == 0)
3099     return clib_error_return (0, "unknown input `%U'",
3100                               format_unformat_error, input);
3101   
3102   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3103   switch (rv)
3104     {
3105     case 0:
3106       break;
3107       
3108     case VNET_API_ERROR_NO_SUCH_FIB:
3109       return clib_error_return (0, "no such FIB table %d", table_id);
3110       
3111     default:
3112       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3113       break;
3114     }
3115   
3116   return 0;
3117 }
3118  
3119 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3120   .path = "set ip flow-hash",
3121   .short_help = 
3122   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3123   .function = set_ip_flow_hash_command_fn,
3124 };
3125  
3126 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3127                                  u32 table_index)
3128 {
3129   vnet_main_t * vnm = vnet_get_main();
3130   vnet_interface_main_t * im = &vnm->interface_main;
3131   ip4_main_t * ipm = &ip4_main;
3132   ip_lookup_main_t * lm = &ipm->lookup_main;
3133   vnet_classify_main_t * cm = &vnet_classify_main;
3134
3135   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3136     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3137
3138   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3139     return VNET_API_ERROR_NO_SUCH_ENTRY;
3140
3141   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3142   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3143
3144   return 0;
3145 }
3146
3147 static clib_error_t *
3148 set_ip_classify_command_fn (vlib_main_t * vm,
3149                             unformat_input_t * input,
3150                             vlib_cli_command_t * cmd)
3151 {
3152   u32 table_index = ~0;
3153   int table_index_set = 0;
3154   u32 sw_if_index = ~0;
3155   int rv;
3156   
3157   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3158     if (unformat (input, "table-index %d", &table_index))
3159       table_index_set = 1;
3160     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3161                        vnet_get_main(), &sw_if_index))
3162       ;
3163     else
3164       break;
3165   }
3166       
3167   if (table_index_set == 0)
3168     return clib_error_return (0, "classify table-index must be specified");
3169
3170   if (sw_if_index == ~0)
3171     return clib_error_return (0, "interface / subif must be specified");
3172
3173   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3174
3175   switch (rv)
3176     {
3177     case 0:
3178       break;
3179
3180     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3181       return clib_error_return (0, "No such interface");
3182
3183     case VNET_API_ERROR_NO_SUCH_ENTRY:
3184       return clib_error_return (0, "No such classifier table");
3185     }
3186   return 0;
3187 }
3188
3189 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3190     .path = "set ip classify",
3191     .short_help = 
3192     "set ip classify intfc <int> table-index <index>",
3193     .function = set_ip_classify_command_fn,
3194 };
3195