Make adjacencies shareable
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void serialize_ip4_address (serialize_main_t * m, va_list * va)
140 {
141   ip4_address_t * a = va_arg (*va, ip4_address_t *);
142   u8 * p = serialize_get (m, sizeof (a->as_u8));
143   memcpy (p, a->as_u8, sizeof (a->as_u8));
144 }
145
146 static void unserialize_ip4_address (serialize_main_t * m, va_list * va)
147 {
148   ip4_address_t * a = va_arg (*va, ip4_address_t *);
149   u8 * p = unserialize_get (m, sizeof (a->as_u8));
150   memcpy (a->as_u8, p, sizeof (a->as_u8));
151 }
152
153 static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
154 {
155   ip4_address_t * a = va_arg (*va, ip4_address_t *);
156   u32 l = va_arg (*va, u32);
157   u32 n_bytes = (l / 8) + ((l % 8) != 0);
158   u8 * p = serialize_get (m, 1 + n_bytes);
159   ASSERT (l <= 32);
160   p[0] = l;
161   memcpy (p + 1, a->as_u8, n_bytes);
162 }
163
164 static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
165 {
166   ip4_address_t * a = va_arg (*va, ip4_address_t *);
167   u32 * al = va_arg (*va, u32 *);
168   u8 * p = unserialize_get (m, 1);
169   u32 l, n_bytes;
170
171   al[0] = l = p[0];
172   ASSERT (l <= 32);
173   n_bytes = (l / 8) + ((l % 8) != 0);
174
175   if (n_bytes)
176     {
177       p = unserialize_get (m, n_bytes);
178       memcpy (a->as_u8, p, n_bytes);
179     }
180 }
181
182 static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
183 {
184   ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *);
185     
186   serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id);
187   serialize_likely_small_unsigned_integer (m, a->flags);
188   serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length);
189   serialize_likely_small_unsigned_integer (m, a->adj_index);
190   serialize_likely_small_unsigned_integer (m, a->n_add_adj);
191   if (a->n_add_adj > 0)
192     serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj);
193 }
194
195 /* Serialized adjacencies for arp/rewrite do not send graph next_index
196    since graph hookup is not guaranteed to be the same for both sides
197    of serialize/unserialize. */
198 static void
199 unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm,
200                                            ip_adjacency_t * adj,
201                                            u32 n_adj)
202 {
203   vnet_main_t * vnm = vnet_get_main();
204   u32 i, ni, sw_if_index, is_arp;
205   vnet_hw_interface_t * hw;
206
207   for (i = 0; i < n_adj; i++)
208     {
209       switch (adj[i].lookup_next_index)
210         {
211         case IP_LOOKUP_NEXT_REWRITE:
212         case IP_LOOKUP_NEXT_ARP:
213           is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP;
214           sw_if_index = adj[i].rewrite_header.sw_if_index;
215           hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
216           ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index;
217           adj[i].rewrite_header.node_index = ni;
218           adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index);
219           if (is_arp)
220             vnet_rewrite_for_sw_interface
221               (vnm,
222                VNET_L3_PACKET_TYPE_ARP,
223                sw_if_index,
224                ni,
225                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
226                &adj[i].rewrite_header,
227                sizeof (adj->rewrite_data));
228           break;
229
230         default:
231           break;
232         }
233     }
234 }
235
236 static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
237 {
238   ip4_main_t * i4m = &ip4_main;
239   ip4_add_del_route_args_t a;
240     
241   a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m);
242   a.flags = unserialize_likely_small_unsigned_integer (m);
243   unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length);
244   a.adj_index = unserialize_likely_small_unsigned_integer (m);
245   a.n_add_adj = unserialize_likely_small_unsigned_integer (m);
246   a.add_adj = 0;
247   if (a.n_add_adj > 0)
248     {
249       vec_resize (a.add_adj, a.n_add_adj);
250       unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj);
251       unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(), 
252                                                  a.add_adj, a.n_add_adj);
253     }
254
255   /* Prevent re-re-distribution. */
256   a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE;
257
258   ip4_add_del_route (i4m, &a);
259
260   vec_free (a.add_adj);
261 }
262
263 MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = {
264   .name = "vnet_ip4_add_del_route",
265   .serialize = serialize_ip4_add_del_route_msg,
266   .unserialize = unserialize_ip4_add_del_route_msg,
267 };
268
269 static void
270 ip4_fib_set_adj_index (ip4_main_t * im,
271                        ip4_fib_t * fib,
272                        u32 flags,
273                        u32 dst_address_u32,
274                        u32 dst_address_length,
275                        u32 adj_index)
276 {
277   ip_lookup_main_t * lm = &im->lookup_main;
278   uword * hash;
279
280   if (vec_bytes(fib->old_hash_values))
281     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
282   if (vec_bytes(fib->new_hash_values))
283     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
284   fib->new_hash_values[0] = adj_index;
285
286   /* Make sure adj index is valid. */
287   if (CLIB_DEBUG > 0)
288     (void) ip_get_adjacency (lm, adj_index);
289
290   hash = fib->adj_index_by_dst_address[dst_address_length];
291
292   hash = _hash_set3 (hash, dst_address_u32,
293                      fib->new_hash_values,
294                      fib->old_hash_values);
295
296   fib->adj_index_by_dst_address[dst_address_length] = hash;
297
298   if (vec_len (im->add_del_route_callbacks) > 0)
299     {
300       ip4_add_del_route_callback_t * cb;
301       ip4_address_t d;
302       uword * p;
303
304       d.data_u32 = dst_address_u32;
305       vec_foreach (cb, im->add_del_route_callbacks)
306         if ((flags & cb->required_flags) == cb->required_flags)
307           cb->function (im, cb->function_opaque,
308                         fib, flags,
309                         &d, dst_address_length,
310                         fib->old_hash_values,
311                         fib->new_hash_values);
312
313       p = hash_get (hash, dst_address_u32);
314       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
315     }
316 }
317
318 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
319 {
320   vlib_main_t * vm = vlib_get_main();
321   ip_lookup_main_t * lm = &im->lookup_main;
322   ip4_fib_t * fib;
323   u32 dst_address, dst_address_length, adj_index, old_adj_index;
324   uword * hash, is_del;
325   ip4_add_del_route_callback_t * cb;
326
327   if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
328     {
329       u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
330       mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer,
331                      &ip4_add_del_route_msg, a);
332       return;
333     }
334
335   /* Either create new adjacency or use given one depending on arguments. */
336   if (a->n_add_adj > 0)
337     {
338       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
339       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
340     }
341   else
342     adj_index = a->adj_index;
343
344   dst_address = a->dst_address.data_u32;
345   dst_address_length = a->dst_address_length;
346   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
347
348   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
349   dst_address &= im->fib_masks[dst_address_length];
350
351   if (! fib->adj_index_by_dst_address[dst_address_length])
352     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
353
354   hash = fib->adj_index_by_dst_address[dst_address_length];
355
356   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
357
358   if (is_del)
359     {
360       fib->old_hash_values[0] = ~0;
361       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
362       fib->adj_index_by_dst_address[dst_address_length] = hash;
363
364       if (vec_len (im->add_del_route_callbacks) > 0
365           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
366         {
367           fib->new_hash_values[0] = ~0;
368           vec_foreach (cb, im->add_del_route_callbacks)
369             if ((a->flags & cb->required_flags) == cb->required_flags)
370               cb->function (im, cb->function_opaque,
371                             fib, a->flags,
372                             &a->dst_address, dst_address_length,
373                             fib->old_hash_values,
374                             fib->new_hash_values);
375         }
376     }
377   else
378     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
379                            adj_index);
380
381   old_adj_index = fib->old_hash_values[0];
382
383   /* Avoid spurious reference count increments */
384   if (old_adj_index == adj_index)
385     {
386       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
387       if (adj->share_count > 0)
388         adj->share_count --;
389     }
390
391   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
392                                is_del ? old_adj_index : adj_index,
393                                is_del);
394
395   /* Delete old adjacency index if present and changed. */
396   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
397       && old_adj_index != ~0
398       && old_adj_index != adj_index)
399     ip_del_adjacency (lm, old_adj_index);
400 }
401
402 static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
403 {
404   u32 flags = va_arg (*va, u32);
405   ip4_address_t * dst_address = va_arg (*va, ip4_address_t *);
406   u32 dst_address_length = va_arg (*va, u32);
407   ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *);
408   u32 next_hop_sw_if_index = va_arg (*va, u32);
409   u32 next_hop_weight = va_arg (*va, u32);
410
411   serialize_likely_small_unsigned_integer (m, flags);
412   serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length);
413   serialize (m, serialize_ip4_address, next_hop_address);
414   serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index);
415   serialize_likely_small_unsigned_integer (m, next_hop_weight);
416 }
417
418 static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
419 {
420   ip4_main_t * im = &ip4_main;
421   u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight;
422   ip4_address_t dst_address, next_hop_address;
423
424   flags = unserialize_likely_small_unsigned_integer (m);
425   unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length);
426   unserialize (m, unserialize_ip4_address, &next_hop_address);
427   next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m);
428   next_hop_weight = unserialize_likely_small_unsigned_integer (m);
429
430   ip4_add_del_route_next_hop
431     (im,
432      flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE,
433      &dst_address,
434      dst_address_length,
435      &next_hop_address,
436      next_hop_sw_if_index,
437      next_hop_weight, (u32)~0, 
438      (u32)~0 /* explicit FIB index */);
439 }
440
441 MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = {
442   .name = "vnet_ip4_add_del_route_next_hop",
443   .serialize = serialize_ip4_add_del_route_next_hop_msg,
444   .unserialize = unserialize_ip4_add_del_route_next_hop_msg,
445 };
446
447 void
448 ip4_add_del_route_next_hop (ip4_main_t * im,
449                             u32 flags,
450                             ip4_address_t * dst_address,
451                             u32 dst_address_length,
452                             ip4_address_t * next_hop,
453                             u32 next_hop_sw_if_index,
454                             u32 next_hop_weight, u32 adj_index, 
455                             u32 explicit_fib_index)
456 {
457   vnet_main_t * vnm = vnet_get_main();
458   vlib_main_t * vm = vlib_get_main();
459   ip_lookup_main_t * lm = &im->lookup_main;
460   u32 fib_index;
461   ip4_fib_t * fib;
462   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
463   u32 dst_adj_index, nh_adj_index;
464   uword * dst_hash, * dst_result;
465   uword * nh_hash, * nh_result;
466   ip_adjacency_t * dst_adj;
467   ip_multipath_adjacency_t * old_mp, * new_mp;
468   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
469   int is_interface_next_hop;
470   clib_error_t * error = 0;
471
472   if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
473     {
474       u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
475       mc_serialize2 (vm->mc_main,
476                      multiple_messages_per_vlib_buffer,
477                      &ip4_add_del_route_next_hop_msg,
478                      flags,
479                      dst_address, dst_address_length,
480                      next_hop, next_hop_sw_if_index, next_hop_weight);
481       return;
482     }
483
484   if (explicit_fib_index == (u32)~0)
485       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
486   else
487       fib_index = explicit_fib_index;
488
489   fib = vec_elt_at_index (im->fibs, fib_index);
490   
491   /* Lookup next hop to be added or deleted. */
492   is_interface_next_hop = next_hop->data_u32 == 0;
493   if (adj_index == (u32)~0)
494     {
495       if (is_interface_next_hop)
496         {
497           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
498           if (nh_result)
499             nh_adj_index = *nh_result;
500           else
501             {
502               ip_adjacency_t * adj;
503               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
504                                       &nh_adj_index);
505               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
506               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
507               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
508             }
509         }
510       else
511         {
512           nh_hash = fib->adj_index_by_dst_address[32];
513           nh_result = hash_get (nh_hash, next_hop->data_u32);
514           
515           /* Next hop must be known. */
516           if (! nh_result)
517             {
518               vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
519               error = clib_error_return (0, "next-hop %U/32 not in FIB",
520                                          format_ip4_address, next_hop);
521               goto done;
522             }
523           nh_adj_index = *nh_result;
524         }
525     }
526   else
527     {
528       nh_adj_index = adj_index;
529     }
530   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
531   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
532
533   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
534   dst_result = hash_get (dst_hash, dst_address_u32);
535   if (dst_result)
536     {
537       dst_adj_index = dst_result[0];
538       dst_adj = ip_get_adjacency (lm, dst_adj_index);
539     }
540   else
541     {
542       /* For deletes destination must be known. */
543       if (is_del)
544         {
545           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
546           error = clib_error_return (0, "unknown destination %U/%d",
547                                      format_ip4_address, dst_address,
548                                      dst_address_length);
549           goto done;
550         }
551
552       dst_adj_index = ~0;
553       dst_adj = 0;
554     }
555
556   /* Ignore adds of X/32 with next hop of X. */
557   if (! is_del
558       && dst_address_length == 32
559       && dst_address->data_u32 == next_hop->data_u32 
560       && adj_index != (u32)~0)
561     {
562       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
563       error = clib_error_return (0, "prefix matches next hop %U/%d",
564                                  format_ip4_address, dst_address,
565                                  dst_address_length);
566       goto done;
567     }
568
569   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
570
571   if (! ip_multipath_adjacency_add_del_next_hop
572       (lm, is_del,
573        old_mp_adj_index,
574        nh_adj_index,
575        next_hop_weight,
576        &new_mp_adj_index))
577     {
578       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
579       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
580                                  format_ip4_address, next_hop);
581       goto done;
582     }
583   
584   old_mp = new_mp = 0;
585   if (old_mp_adj_index != ~0)
586     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
587   if (new_mp_adj_index != ~0)
588     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
589
590   if (old_mp != new_mp)
591     {
592       ip4_add_del_route_args_t a;
593       a.table_index_or_table_id = fib_index;
594       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
595                  | IP4_ROUTE_FLAG_FIB_INDEX
596                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
597                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
598       a.dst_address = dst_address[0];
599       a.dst_address_length = dst_address_length;
600       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
601       a.add_adj = 0;
602       a.n_add_adj = 0;
603
604       ip4_add_del_route (im, &a);
605     }
606
607  done:
608   if (error)
609     clib_error_report (error);
610 }
611
612 void *
613 ip4_get_route (ip4_main_t * im,
614                u32 table_index_or_table_id,
615                u32 flags,
616                u8 * address,
617                u32 address_length)
618 {
619   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
620   u32 dst_address = * (u32 *) address;
621   uword * hash, * p;
622
623   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
624   dst_address &= im->fib_masks[address_length];
625
626   hash = fib->adj_index_by_dst_address[address_length];
627   p = hash_get (hash, dst_address);
628   return (void *) p;
629 }
630
631 void
632 ip4_foreach_matching_route (ip4_main_t * im,
633                             u32 table_index_or_table_id,
634                             u32 flags,
635                             ip4_address_t * address,
636                             u32 address_length,
637                             ip4_address_t ** results,
638                             u8 ** result_lengths)
639 {
640   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
641   u32 dst_address = address->data_u32;
642   u32 this_length = address_length;
643   
644   if (*results)
645     _vec_len (*results) = 0;
646   if (*result_lengths)
647     _vec_len (*result_lengths) = 0;
648
649   while (this_length <= 32 && vec_len (results) == 0)
650     {
651       uword k, v;
652       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
653         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
654           {
655             ip4_address_t a;
656             a.data_u32 = k;
657             vec_add1 (*results, a);
658             vec_add1 (*result_lengths, this_length);
659           }
660       }));
661
662       this_length++;
663     }
664 }
665
666 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
667                                   u32 table_index_or_table_id,
668                                   u32 flags)
669 {
670   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
671   ip_lookup_main_t * lm = &im->lookup_main;
672   u32 i, l;
673   ip4_address_t a;
674   ip4_add_del_route_callback_t * cb;
675   static ip4_address_t * to_delete;
676
677   if (lm->n_adjacency_remaps == 0)
678     return;
679
680   for (l = 0; l <= 32; l++)
681     {
682       hash_pair_t * p;
683       uword * hash = fib->adj_index_by_dst_address[l];
684
685       if (hash_elts (hash) == 0)
686         continue;
687
688       if (to_delete)
689         _vec_len (to_delete) = 0;
690
691       hash_foreach_pair (p, hash, ({
692         u32 adj_index = p->value[0];
693         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
694
695         if (m)
696           {
697             /* Record destination address from hash key. */
698             a.data_u32 = p->key;
699
700             /* New adjacency points to nothing: so delete prefix. */
701             if (m == ~0)
702               vec_add1 (to_delete, a);
703             else
704               {
705                 /* Remap to new adjacency. */
706                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
707
708                 /* Set new adjacency value. */
709                 fib->new_hash_values[0] = p->value[0] = m - 1;
710
711                 vec_foreach (cb, im->add_del_route_callbacks)
712                   if ((flags & cb->required_flags) == cb->required_flags)
713                     cb->function (im, cb->function_opaque,
714                                   fib, flags | IP4_ROUTE_FLAG_ADD,
715                                   &a, l,
716                                   fib->old_hash_values,
717                                   fib->new_hash_values);
718               }
719           }
720       }));
721
722       fib->new_hash_values[0] = ~0;
723       for (i = 0; i < vec_len (to_delete); i++)
724         {
725           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
726           vec_foreach (cb, im->add_del_route_callbacks)
727             if ((flags & cb->required_flags) == cb->required_flags)
728               cb->function (im, cb->function_opaque,
729                             fib, flags | IP4_ROUTE_FLAG_DEL,
730                             &a, l,
731                             fib->old_hash_values,
732                             fib->new_hash_values);
733         }
734     }
735
736   /* Also remap adjacencies in mtrie. */
737   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
738
739   /* Reset mapping table. */
740   vec_zero (lm->adjacency_remap_table);
741
742   /* All remaps have been performed. */
743   lm->n_adjacency_remaps = 0;
744 }
745
746 void ip4_delete_matching_routes (ip4_main_t * im,
747                                  u32 table_index_or_table_id,
748                                  u32 flags,
749                                  ip4_address_t * address,
750                                  u32 address_length)
751 {
752   static ip4_address_t * matching_addresses;
753   static u8 * matching_address_lengths;
754   u32 l, i;
755   ip4_add_del_route_args_t a;
756
757   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
758   a.table_index_or_table_id = table_index_or_table_id;
759   a.adj_index = ~0;
760   a.add_adj = 0;
761   a.n_add_adj = 0;
762
763   for (l = address_length + 1; l <= 32; l++)
764     {
765       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
766                                   address,
767                                   l,
768                                   &matching_addresses,
769                                   &matching_address_lengths);
770       for (i = 0; i < vec_len (matching_addresses); i++)
771         {
772           a.dst_address = matching_addresses[i];
773           a.dst_address_length = matching_address_lengths[i];
774           ip4_add_del_route (im, &a);
775         }
776     }
777
778   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
779 }
780
781 always_inline uword
782 ip4_lookup_inline (vlib_main_t * vm,
783                    vlib_node_runtime_t * node,
784                    vlib_frame_t * frame,
785                    int lookup_for_responses_to_locally_received_packets)
786 {
787   ip4_main_t * im = &ip4_main;
788   ip_lookup_main_t * lm = &im->lookup_main;
789   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
790   u32 n_left_from, n_left_to_next, * from, * to_next;
791   ip_lookup_next_t next;
792   u32 cpu_index = os_get_cpu_number();
793
794   from = vlib_frame_vector_args (frame);
795   n_left_from = frame->n_vectors;
796   next = node->cached_next_index;
797
798   while (n_left_from > 0)
799     {
800       vlib_get_next_frame (vm, node, next,
801                            to_next, n_left_to_next);
802
803       while (n_left_from >= 4 && n_left_to_next >= 2)
804         {
805           vlib_buffer_t * p0, * p1;
806           ip4_header_t * ip0, * ip1;
807           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
808           ip_lookup_next_t next0, next1;
809           ip_adjacency_t * adj0, * adj1;
810           ip4_fib_mtrie_t * mtrie0, * mtrie1;
811           ip4_fib_mtrie_leaf_t leaf0, leaf1;
812           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
813           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
814           u32 flow_hash_config0, flow_hash_config1;
815           u32 hash_c0, hash_c1;
816           u32 wrong_next;
817
818           /* Prefetch next iteration. */
819           {
820             vlib_buffer_t * p2, * p3;
821
822             p2 = vlib_get_buffer (vm, from[2]);
823             p3 = vlib_get_buffer (vm, from[3]);
824
825             vlib_prefetch_buffer_header (p2, LOAD);
826             vlib_prefetch_buffer_header (p3, LOAD);
827
828             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
829             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
830           }
831
832           pi0 = to_next[0] = from[0];
833           pi1 = to_next[1] = from[1];
834
835           p0 = vlib_get_buffer (vm, pi0);
836           p1 = vlib_get_buffer (vm, pi1);
837
838           ip0 = vlib_buffer_get_current (p0);
839           ip1 = vlib_buffer_get_current (p1);
840
841           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
842           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
843           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
844             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
845           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
846             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
847
848
849           if (! lookup_for_responses_to_locally_received_packets)
850             {
851               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
852               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
853
854               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
855
856               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
857               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
858             }
859
860           tcp0 = (void *) (ip0 + 1);
861           tcp1 = (void *) (ip1 + 1);
862
863           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
864                          || ip0->protocol == IP_PROTOCOL_UDP);
865           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
866                          || ip1->protocol == IP_PROTOCOL_UDP);
867
868           if (! lookup_for_responses_to_locally_received_packets)
869             {
870               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
871               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
872             }
873
874           if (! lookup_for_responses_to_locally_received_packets)
875             {
876               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
877               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
878             }
879
880           if (! lookup_for_responses_to_locally_received_packets)
881             {
882               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
883               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
884             }
885
886           if (lookup_for_responses_to_locally_received_packets)
887             {
888               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
889               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
890             }
891           else
892             {
893               /* Handle default route. */
894               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
895               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
896
897               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
898               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
899             }
900
901           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
902                                                            &ip0->dst_address,
903                                                            /* no_default_route */ 0));
904           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
905                                                            &ip1->dst_address,
906                                                            /* no_default_route */ 0));
907           adj0 = ip_get_adjacency (lm, adj_index0);
908           adj1 = ip_get_adjacency (lm, adj_index1);
909
910           next0 = adj0->lookup_next_index;
911           next1 = adj1->lookup_next_index;
912
913           /* Use flow hash to compute multipath adjacency. */
914           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
915           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
916           if (PREDICT_FALSE (adj0->n_adj > 1))
917             {
918               flow_hash_config0 = 
919                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
920               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
921                 ip4_compute_flow_hash (ip0, flow_hash_config0);
922             }
923           if (PREDICT_FALSE(adj1->n_adj > 1))
924             {
925               flow_hash_config1 = 
926                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
927               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
928                 ip4_compute_flow_hash (ip1, flow_hash_config1);
929             }
930
931           ASSERT (adj0->n_adj > 0);
932           ASSERT (adj1->n_adj > 0);
933           ASSERT (is_pow2 (adj0->n_adj));
934           ASSERT (is_pow2 (adj1->n_adj));
935           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
936           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
937
938           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
939           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
940
941           vlib_increment_combined_counter 
942               (cm, cpu_index, adj_index0, 1,
943                vlib_buffer_length_in_chain (vm, p0) 
944                + sizeof(ethernet_header_t));
945           vlib_increment_combined_counter 
946               (cm, cpu_index, adj_index1, 1,
947                vlib_buffer_length_in_chain (vm, p1)
948                + sizeof(ethernet_header_t));
949
950           from += 2;
951           to_next += 2;
952           n_left_to_next -= 2;
953           n_left_from -= 2;
954
955           wrong_next = (next0 != next) + 2*(next1 != next);
956           if (PREDICT_FALSE (wrong_next != 0))
957             {
958               switch (wrong_next)
959                 {
960                 case 1:
961                   /* A B A */
962                   to_next[-2] = pi1;
963                   to_next -= 1;
964                   n_left_to_next += 1;
965                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
966                   break;
967
968                 case 2:
969                   /* A A B */
970                   to_next -= 1;
971                   n_left_to_next += 1;
972                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
973                   break;
974
975                 case 3:
976                   /* A B C */
977                   to_next -= 2;
978                   n_left_to_next += 2;
979                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
980                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
981                   if (next0 == next1)
982                     {
983                       /* A B B */
984                       vlib_put_next_frame (vm, node, next, n_left_to_next);
985                       next = next1;
986                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
987                     }
988                 }
989             }
990         }
991     
992       while (n_left_from > 0 && n_left_to_next > 0)
993         {
994           vlib_buffer_t * p0;
995           ip4_header_t * ip0;
996           __attribute__((unused)) tcp_header_t * tcp0;
997           ip_lookup_next_t next0;
998           ip_adjacency_t * adj0;
999           ip4_fib_mtrie_t * mtrie0;
1000           ip4_fib_mtrie_leaf_t leaf0;
1001           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
1002           u32 flow_hash_config0, hash_c0;
1003
1004           pi0 = from[0];
1005           to_next[0] = pi0;
1006
1007           p0 = vlib_get_buffer (vm, pi0);
1008
1009           ip0 = vlib_buffer_get_current (p0);
1010
1011           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1012           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
1013             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
1014
1015           if (! lookup_for_responses_to_locally_received_packets)
1016             {
1017               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1018
1019               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1020
1021               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
1022             }
1023
1024           tcp0 = (void *) (ip0 + 1);
1025
1026           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
1027                          || ip0->protocol == IP_PROTOCOL_UDP);
1028
1029           if (! lookup_for_responses_to_locally_received_packets)
1030             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
1031
1032           if (! lookup_for_responses_to_locally_received_packets)
1033             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
1034
1035           if (! lookup_for_responses_to_locally_received_packets)
1036             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
1037
1038           if (lookup_for_responses_to_locally_received_packets)
1039             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1040           else
1041             {
1042               /* Handle default route. */
1043               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1044               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1045             }
1046
1047           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1048                                                            &ip0->dst_address,
1049                                                            /* no_default_route */ 0));
1050
1051           adj0 = ip_get_adjacency (lm, adj_index0);
1052
1053           next0 = adj0->lookup_next_index;
1054
1055           /* Use flow hash to compute multipath adjacency. */
1056           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1057           if (PREDICT_FALSE(adj0->n_adj > 1))
1058             {
1059               flow_hash_config0 = 
1060                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1061
1062               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1063                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1064             }
1065
1066           ASSERT (adj0->n_adj > 0);
1067           ASSERT (is_pow2 (adj0->n_adj));
1068           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1069
1070           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1071
1072           vlib_increment_combined_counter 
1073               (cm, cpu_index, adj_index0, 1,
1074                vlib_buffer_length_in_chain (vm, p0)
1075                + sizeof(ethernet_header_t));
1076
1077           from += 1;
1078           to_next += 1;
1079           n_left_to_next -= 1;
1080           n_left_from -= 1;
1081
1082           if (PREDICT_FALSE (next0 != next))
1083             {
1084               n_left_to_next += 1;
1085               vlib_put_next_frame (vm, node, next, n_left_to_next);
1086               next = next0;
1087               vlib_get_next_frame (vm, node, next,
1088                                    to_next, n_left_to_next);
1089               to_next[0] = pi0;
1090               to_next += 1;
1091               n_left_to_next -= 1;
1092             }
1093         }
1094
1095       vlib_put_next_frame (vm, node, next, n_left_to_next);
1096     }
1097
1098   return frame->n_vectors;
1099 }
1100
1101 static uword
1102 ip4_lookup (vlib_main_t * vm,
1103             vlib_node_runtime_t * node,
1104             vlib_frame_t * frame)
1105 {
1106   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
1107
1108 }
1109
1110 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1111                                         ip_adjacency_t * adj,
1112                                         u32 sw_if_index,
1113                                         u32 if_address_index)
1114 {
1115   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1116   ip_lookup_next_t n;
1117   vnet_l3_packet_type_t packet_type;
1118   u32 node_index;
1119
1120   if (hw->hw_class_index == ethernet_hw_interface_class.index
1121       || hw->hw_class_index == srp_hw_interface_class.index)
1122     {
1123       /* 
1124        * We have a bit of a problem in this case. ip4-arp uses
1125        * the rewrite_header.next_index to hand pkts to the
1126        * indicated inteface output node. We can end up in
1127        * ip4_rewrite_local, too, which also pays attention to 
1128        * rewrite_header.next index. Net result: a hack in
1129        * ip4_rewrite_local...
1130        */
1131       n = IP_LOOKUP_NEXT_ARP;
1132       node_index = ip4_arp_node.index;
1133       adj->if_address_index = if_address_index;
1134       packet_type = VNET_L3_PACKET_TYPE_ARP;
1135     }
1136   else
1137     {
1138       n = IP_LOOKUP_NEXT_REWRITE;
1139       node_index = ip4_rewrite_node.index;
1140       packet_type = VNET_L3_PACKET_TYPE_IP4;
1141     }
1142
1143   adj->lookup_next_index = n;
1144   vnet_rewrite_for_sw_interface
1145     (vnm,
1146      packet_type,
1147      sw_if_index,
1148      node_index,
1149      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1150      &adj->rewrite_header,
1151      sizeof (adj->rewrite_data));
1152 }
1153
1154 static void
1155 ip4_add_interface_routes (u32 sw_if_index,
1156                           ip4_main_t * im, u32 fib_index,
1157                           ip_interface_address_t * a)
1158 {
1159   vnet_main_t * vnm = vnet_get_main();
1160   ip_lookup_main_t * lm = &im->lookup_main;
1161   ip_adjacency_t * adj;
1162   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1163   ip4_add_del_route_args_t x;
1164   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1165   u32 classify_table_index;
1166
1167   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1168   x.table_index_or_table_id = fib_index;
1169   x.flags = (IP4_ROUTE_FLAG_ADD
1170              | IP4_ROUTE_FLAG_FIB_INDEX
1171              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1172   x.dst_address = address[0];
1173   x.dst_address_length = a->address_length;
1174   x.n_add_adj = 0;
1175   x.add_adj = 0;
1176
1177   a->neighbor_probe_adj_index = ~0;
1178   if (a->address_length < 32)
1179     {
1180       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1181                               &x.adj_index);
1182       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1183       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1184       ip4_add_del_route (im, &x);
1185       a->neighbor_probe_adj_index = x.adj_index;
1186     }
1187   
1188   /* Add e.g. 1.1.1.1/32 as local to this host. */
1189   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1190                           &x.adj_index);
1191   
1192   classify_table_index = ~0;
1193   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1194     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1195   if (classify_table_index != (u32) ~0)
1196     {
1197       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1198       adj->classify_table_index = classify_table_index;
1199     }
1200   else
1201     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1202   
1203   adj->if_address_index = a - lm->if_address_pool;
1204   adj->rewrite_header.sw_if_index = sw_if_index;
1205   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1206   /* 
1207    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1208    * fail an RPF-ish check, but still go thru the rewrite code...
1209    */
1210   adj->rewrite_header.data_bytes = 0;
1211
1212   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1213   x.dst_address_length = 32;
1214   ip4_add_del_route (im, &x);
1215 }
1216
1217 static void
1218 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1219 {
1220   ip4_add_del_route_args_t x;
1221
1222   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1223   x.table_index_or_table_id = fib_index;
1224   x.flags = (IP4_ROUTE_FLAG_DEL
1225              | IP4_ROUTE_FLAG_FIB_INDEX
1226              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1227   x.dst_address = address[0];
1228   x.dst_address_length = address_length;
1229   x.adj_index = ~0;
1230   x.n_add_adj = 0;
1231   x.add_adj = 0;
1232
1233   if (address_length < 32)
1234     ip4_add_del_route (im, &x);
1235
1236   x.dst_address_length = 32;
1237   ip4_add_del_route (im, &x);
1238
1239   ip4_delete_matching_routes (im,
1240                               fib_index,
1241                               IP4_ROUTE_FLAG_FIB_INDEX,
1242                               address,
1243                               address_length);
1244 }
1245
1246 typedef struct {
1247     u32 sw_if_index;
1248     ip4_address_t address;
1249     u32 length;
1250 } ip4_interface_address_t;
1251
1252 static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1253 {
1254     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1255     u32 n = va_arg (*va, u32);
1256     u32 i;
1257     for (i = 0; i < n; i++) {
1258         serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index));
1259         serialize (m, serialize_ip4_address, &a[i].address);
1260         serialize_integer (m, a[i].length, sizeof (a[i].length));
1261     }
1262 }
1263
1264 static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1265 {
1266     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1267     u32 n = va_arg (*va, u32);
1268     u32 i;
1269     for (i = 0; i < n; i++) {
1270         unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index));
1271         unserialize (m, unserialize_ip4_address, &a[i].address);
1272         unserialize_integer (m, &a[i].length, sizeof (a[i].length));
1273     }
1274 }
1275
1276 static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1277 {
1278   ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1279   int is_del = va_arg (*va, int);
1280   serialize (m, serialize_vec_ip4_set_interface_address, a, 1);
1281   serialize_integer (m, is_del, sizeof (is_del));
1282 }
1283
1284 static clib_error_t *
1285 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1286                                         u32 sw_if_index,
1287                                         ip4_address_t * new_address,
1288                                         u32 new_length,
1289                                         u32 redistribute,
1290                                         u32 insert_routes,
1291                                         u32 is_del);
1292
1293 static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1294 {
1295   mc_main_t * mcm = va_arg (*va, mc_main_t *);
1296   vlib_main_t * vm = mcm->vlib_main;
1297   ip4_interface_address_t a;
1298   clib_error_t * error;
1299   int is_del;
1300
1301   unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1);
1302   unserialize_integer (m, &is_del, sizeof (is_del));
1303   error = ip4_add_del_interface_address_internal
1304     (vm, a.sw_if_index, &a.address, a.length,
1305      /* redistribute */ 0,
1306      /* insert_routes */ 1,
1307      is_del);
1308   if (error)
1309     clib_error_report (error);
1310 }
1311
1312 MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = {
1313   .name = "vnet_ip4_set_interface_address",
1314   .serialize = serialize_ip4_set_interface_address_msg,
1315   .unserialize = unserialize_ip4_set_interface_address_msg,
1316 };
1317
1318 static clib_error_t *
1319 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1320                                         u32 sw_if_index,
1321                                         ip4_address_t * address,
1322                                         u32 address_length,
1323                                         u32 redistribute,
1324                                         u32 insert_routes,
1325                                         u32 is_del)
1326 {
1327   vnet_main_t * vnm = vnet_get_main();
1328   ip4_main_t * im = &ip4_main;
1329   ip_lookup_main_t * lm = &im->lookup_main;
1330   clib_error_t * error = 0;
1331   u32 if_address_index, elts_before;
1332   ip4_address_fib_t ip4_af, * addr_fib = 0;
1333
1334   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1335   ip4_addr_fib_init (&ip4_af, address,
1336                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1337   vec_add1 (addr_fib, ip4_af);
1338
1339   /* When adding an address check that it does not conflict with an existing address. */
1340   if (! is_del)
1341     {
1342       ip_interface_address_t * ia;
1343       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1344                                     0 /* honor unnumbered */,
1345       ({
1346         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1347
1348         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1349             || ip4_destination_matches_route (im, x, address, address_length))
1350           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1351                                     format_ip4_address_and_length, address, address_length,
1352                                     format_ip4_address_and_length, x, ia->address_length,
1353                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1354       }));
1355     }
1356
1357   if (vm->mc_main && redistribute)
1358     {
1359       ip4_interface_address_t a;
1360       a.sw_if_index = sw_if_index;
1361       a.address = address[0];
1362       a.length = address_length;
1363       mc_serialize (vm->mc_main, &ip4_set_interface_address_msg, 
1364                     &a, (int)is_del);
1365       goto done;
1366     }
1367     
1368   elts_before = pool_elts (lm->if_address_pool);
1369
1370   error = ip_interface_address_add_del
1371     (lm,
1372      sw_if_index,
1373      addr_fib,
1374      address_length,
1375      is_del,
1376      &if_address_index);
1377   if (error)
1378     goto done;
1379   
1380   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1381     {
1382       if (is_del)
1383         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1384                                   address_length);
1385       
1386       else
1387           ip4_add_interface_routes (sw_if_index,
1388                                     im, ip4_af.fib_index,
1389                                     pool_elt_at_index 
1390                                     (lm->if_address_pool, if_address_index));
1391     }
1392
1393   /* If pool did not grow/shrink: add duplicate address. */
1394   if (elts_before != pool_elts (lm->if_address_pool))
1395     {
1396       ip4_add_del_interface_address_callback_t * cb;
1397       vec_foreach (cb, im->add_del_interface_address_callbacks)
1398         cb->function (im, cb->function_opaque, sw_if_index,
1399                       address, address_length,
1400                       if_address_index,
1401                       is_del);
1402     }
1403
1404  done:
1405   vec_free (addr_fib);
1406   return error;
1407 }
1408
1409 clib_error_t *
1410 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1411                                ip4_address_t * address, u32 address_length,
1412                                u32 is_del)
1413 {
1414   return ip4_add_del_interface_address_internal
1415     (vm, sw_if_index, address, address_length,
1416      /* redistribute */ 1,
1417      /* insert_routes */ 1,
1418      is_del);
1419 }
1420
1421 static void serialize_ip4_fib (serialize_main_t * m, va_list * va)
1422 {
1423   ip4_fib_t * f = va_arg (*va, ip4_fib_t *);
1424   u32 l, dst, adj_index;
1425
1426   serialize_integer (m, f->table_id, sizeof (f->table_id));
1427   for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++)
1428     {
1429       u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]);
1430
1431       serialize_integer (m, n_elts, sizeof (n_elts));
1432       hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({
1433         ip4_address_t tmp;
1434         tmp.as_u32 = dst;
1435         serialize (m, serialize_ip4_address, &tmp);
1436         serialize_integer (m, adj_index, sizeof (adj_index));
1437       }));
1438     }
1439 }
1440
1441 static void unserialize_ip4_fib (serialize_main_t * m, va_list * va)
1442 {
1443   ip4_add_del_route_args_t a;
1444   u32 i;
1445
1446   a.flags = (IP4_ROUTE_FLAG_ADD
1447              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE
1448              | IP4_ROUTE_FLAG_TABLE_ID);
1449   a.n_add_adj = 0;
1450   a.add_adj = 0;
1451
1452   unserialize_integer (m, &a.table_index_or_table_id,
1453                        sizeof (a.table_index_or_table_id));
1454
1455   for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++)
1456     {
1457       u32 n_elts;
1458       unserialize_integer (m, &n_elts, sizeof (u32));
1459       a.dst_address_length = i;
1460       while (n_elts > 0)
1461         {
1462           unserialize (m, unserialize_ip4_address, &a.dst_address);
1463           unserialize_integer (m, &a.adj_index, sizeof (a.adj_index));
1464           ip4_add_del_route (&ip4_main, &a);
1465           n_elts--;
1466         }
1467     }
1468 }
1469
1470 void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1471 {
1472   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
1473   vnet_interface_main_t * vim = &vnm->interface_main;
1474   vnet_sw_interface_t * si;
1475   ip4_main_t * i4m = &ip4_main;
1476   ip4_interface_address_t * as = 0, * a;
1477
1478   /* Download adjacency tables & multipath stuff. */
1479   serialize (m, serialize_ip_lookup_main, &i4m->lookup_main);
1480
1481   /* FIBs. */
1482   {
1483     ip4_fib_t * f;
1484     u32 n_fibs = vec_len (i4m->fibs);
1485     serialize_integer (m, n_fibs, sizeof (n_fibs));
1486     vec_foreach (f, i4m->fibs)
1487       serialize (m, serialize_ip4_fib, f);
1488   }
1489
1490   /* FIB interface config. */
1491   vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32);
1492
1493   /* Interface ip4 addresses. */
1494   pool_foreach (si, vim->sw_interfaces, ({
1495     u32 sw_if_index = si->sw_if_index;
1496     ip_interface_address_t * ia;
1497     foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index, 
1498                                   0 /* honor unnumbered */,
1499     ({
1500       ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia);
1501       vec_add2 (as, a, 1);
1502       a->address = x[0];
1503       a->length = ia->address_length;
1504       a->sw_if_index = sw_if_index;
1505     }));
1506   }));
1507   vec_serialize (m, as, serialize_vec_ip4_set_interface_address);
1508   vec_free (as);
1509 }
1510
1511 void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1512 {
1513   vlib_main_t * vm = va_arg (*va, vlib_main_t *);
1514   ip4_main_t * i4m = &ip4_main;
1515   ip4_interface_address_t * as = 0, * a;
1516
1517   unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main);
1518
1519   {
1520     ip_adjacency_t * adj, * adj_heap;
1521     u32 n_adj;
1522     adj_heap = i4m->lookup_main.adjacency_heap;
1523     heap_foreach (adj, n_adj, adj_heap, ({
1524       unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj);
1525       ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0);
1526     }));
1527   }
1528
1529   /* FIBs */
1530   {
1531     u32 i, n_fibs;
1532     unserialize_integer (m, &n_fibs, sizeof (n_fibs));
1533     for (i = 0; i < n_fibs; i++)
1534       unserialize (m, unserialize_ip4_fib);
1535   }
1536
1537   vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32);
1538
1539   vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address);
1540   vec_foreach (a, as) {
1541     ip4_add_del_interface_address_internal
1542       (vm, a->sw_if_index, &a->address, a->length,
1543        /* redistribute */ 0,
1544        /* insert_routes */ 0,
1545        /* is_del */ 0);
1546   }
1547   vec_free (as);
1548 }
1549
1550 static clib_error_t *
1551 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1552                                 u32 sw_if_index,
1553                                 u32 flags)
1554 {
1555   ip4_main_t * im = &ip4_main;
1556   ip_interface_address_t * ia;
1557   ip4_address_t * a;
1558   u32 is_admin_up, fib_index;
1559   
1560   /* Fill in lookup tables with default table (0). */
1561   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1562   
1563   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1564   
1565   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1566   
1567   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1568
1569   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1570                                 0 /* honor unnumbered */,
1571   ({
1572     a = ip_interface_address_get_address (&im->lookup_main, ia);
1573     if (is_admin_up)
1574       ip4_add_interface_routes (sw_if_index,
1575                                 im, fib_index,
1576                                 ia);
1577     else
1578       ip4_del_interface_routes (im, fib_index,
1579                                 a, ia->address_length);
1580   }));
1581
1582   return 0;
1583 }
1584  
1585 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1586
1587 static clib_error_t *
1588 ip4_sw_interface_add_del (vnet_main_t * vnm,
1589                           u32 sw_if_index,
1590                           u32 is_add)
1591 {
1592   vlib_main_t * vm = vnm->vlib_main;
1593   ip4_main_t * im = &ip4_main;
1594   ip_lookup_main_t * lm = &im->lookup_main;
1595   u32 ci, cast;
1596
1597   for (cast = 0; cast < VNET_N_CAST; cast++)
1598     {
1599       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1600       vnet_config_main_t * vcm = &cm->config_main;
1601
1602       if (! vcm->node_index_by_feature_index)
1603         {
1604           if (cast == VNET_UNICAST)
1605             {
1606               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1607               static char * feature_nodes[] = {
1608                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1609                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1610                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1611                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1612                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1613                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1614               };
1615
1616               vnet_config_init (vm, vcm,
1617                                 start_nodes, ARRAY_LEN (start_nodes),
1618                                 feature_nodes, ARRAY_LEN (feature_nodes));
1619             }
1620           else
1621             {
1622               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1623               static char * feature_nodes[] = {
1624                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1625                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1626               };
1627
1628               vnet_config_init (vm, vcm,
1629                                 start_nodes, ARRAY_LEN (start_nodes),
1630                                 feature_nodes, ARRAY_LEN (feature_nodes));
1631             }
1632         }
1633
1634       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1635       ci = cm->config_index_by_sw_if_index[sw_if_index];
1636
1637       if (is_add)
1638         ci = vnet_config_add_feature (vm, vcm,
1639                                       ci,
1640                                       IP4_RX_FEATURE_LOOKUP,
1641                                       /* config data */ 0,
1642                                       /* # bytes of config data */ 0);
1643       else
1644         ci = vnet_config_del_feature (vm, vcm,
1645                                       ci,
1646                                       IP4_RX_FEATURE_LOOKUP,
1647                                       /* config data */ 0,
1648                                       /* # bytes of config data */ 0);
1649
1650       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1651     }
1652
1653   return /* no error */ 0;
1654 }
1655
1656 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1657
1658 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1659   .function = ip4_lookup,
1660   .name = "ip4-lookup",
1661   .vector_size = sizeof (u32),
1662
1663   .n_next_nodes = IP_LOOKUP_N_NEXT,
1664   .next_nodes = {
1665     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
1666     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
1667     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
1668     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
1669     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
1670     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
1671     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
1672     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
1673     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
1674     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
1675     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
1676     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
1677     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
1678   },
1679 };
1680
1681 /* Global IP4 main. */
1682 ip4_main_t ip4_main;
1683
1684 clib_error_t *
1685 ip4_lookup_init (vlib_main_t * vm)
1686 {
1687   ip4_main_t * im = &ip4_main;
1688   uword i;
1689
1690   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1691     {
1692       u32 m;
1693
1694       if (i < 32)
1695         m = pow2_mask (i) << (32 - i);
1696       else 
1697         m = ~0;
1698       im->fib_masks[i] = clib_host_to_net_u32 (m);
1699     }
1700
1701   /* Create FIB with index 0 and table id of 0. */
1702   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1703
1704   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1705
1706   {
1707     pg_node_t * pn;
1708     pn = pg_get_node (ip4_lookup_node.index);
1709     pn->unformat_edit = unformat_pg_ip4_header;
1710   }
1711
1712   {
1713     ethernet_arp_header_t h;
1714
1715     memset (&h, 0, sizeof (h));
1716
1717     /* Set target ethernet address to all zeros. */
1718     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1719
1720 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1721 #define _8(f,v) h.f = v;
1722     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1723     _16 (l3_type, ETHERNET_TYPE_IP4);
1724     _8 (n_l2_address_bytes, 6);
1725     _8 (n_l3_address_bytes, 4);
1726     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1727 #undef _16
1728 #undef _8
1729
1730     vlib_packet_template_init (vm,
1731                                &im->ip4_arp_request_packet_template,
1732                                /* data */ &h,
1733                                sizeof (h),
1734                                /* alloc chunk size */ 8,
1735                                "ip4 arp");
1736   }
1737
1738   return 0;
1739 }
1740
1741 VLIB_INIT_FUNCTION (ip4_lookup_init);
1742
1743 typedef struct {
1744   /* Adjacency taken. */
1745   u32 adj_index;
1746   u32 flow_hash;
1747   u32 fib_index;
1748
1749   /* Packet data, possibly *after* rewrite. */
1750   u8 packet_data[64 - 1*sizeof(u32)];
1751 } ip4_forward_next_trace_t;
1752
1753 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1754 {
1755   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1756   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1757   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1758   vnet_main_t * vnm = vnet_get_main();
1759   ip4_main_t * im = &ip4_main;
1760   ip_adjacency_t * adj;
1761   uword indent = format_get_indent (s);
1762
1763   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1764   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1765               t->fib_index, t->adj_index, format_ip_adjacency,
1766               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1767   switch (adj->lookup_next_index)
1768     {
1769     case IP_LOOKUP_NEXT_REWRITE:
1770       s = format (s, "\n%U%U",
1771                   format_white_space, indent,
1772                   format_ip_adjacency_packet_data,
1773                   vnm, &im->lookup_main, t->adj_index,
1774                   t->packet_data, sizeof (t->packet_data));
1775       break;
1776
1777     default:
1778       break;
1779     }
1780
1781   return s;
1782 }
1783
1784 /* Common trace function for all ip4-forward next nodes. */
1785 void
1786 ip4_forward_next_trace (vlib_main_t * vm,
1787                         vlib_node_runtime_t * node,
1788                         vlib_frame_t * frame,
1789                         vlib_rx_or_tx_t which_adj_index)
1790 {
1791   u32 * from, n_left;
1792   ip4_main_t * im = &ip4_main;
1793
1794   n_left = frame->n_vectors;
1795   from = vlib_frame_vector_args (frame);
1796   
1797   while (n_left >= 4)
1798     {
1799       u32 bi0, bi1;
1800       vlib_buffer_t * b0, * b1;
1801       ip4_forward_next_trace_t * t0, * t1;
1802
1803       /* Prefetch next iteration. */
1804       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1805       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1806
1807       bi0 = from[0];
1808       bi1 = from[1];
1809
1810       b0 = vlib_get_buffer (vm, bi0);
1811       b1 = vlib_get_buffer (vm, bi1);
1812
1813       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1814         {
1815           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1816           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1817           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1818           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1819                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1820           memcpy (t0->packet_data,
1821                   vlib_buffer_get_current (b0),
1822                   sizeof (t0->packet_data));
1823         }
1824       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1825         {
1826           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1827           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1828           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1829           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1830                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1831           memcpy (t1->packet_data,
1832                   vlib_buffer_get_current (b1),
1833                   sizeof (t1->packet_data));
1834         }
1835       from += 2;
1836       n_left -= 2;
1837     }
1838
1839   while (n_left >= 1)
1840     {
1841       u32 bi0;
1842       vlib_buffer_t * b0;
1843       ip4_forward_next_trace_t * t0;
1844
1845       bi0 = from[0];
1846
1847       b0 = vlib_get_buffer (vm, bi0);
1848
1849       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1850         {
1851           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1852           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1853           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1854           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1855                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1856           memcpy (t0->packet_data,
1857                   vlib_buffer_get_current (b0),
1858                   sizeof (t0->packet_data));
1859         }
1860       from += 1;
1861       n_left -= 1;
1862     }
1863 }
1864
1865 static uword
1866 ip4_drop_or_punt (vlib_main_t * vm,
1867                   vlib_node_runtime_t * node,
1868                   vlib_frame_t * frame,
1869                   ip4_error_t error_code)
1870 {
1871   u32 * buffers = vlib_frame_vector_args (frame);
1872   uword n_packets = frame->n_vectors;
1873
1874   vlib_error_drop_buffers (vm, node,
1875                            buffers,
1876                            /* stride */ 1,
1877                            n_packets,
1878                            /* next */ 0,
1879                            ip4_input_node.index,
1880                            error_code);
1881
1882   if (node->flags & VLIB_NODE_FLAG_TRACE)
1883     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1884
1885   return n_packets;
1886 }
1887
1888 static uword
1889 ip4_drop (vlib_main_t * vm,
1890           vlib_node_runtime_t * node,
1891           vlib_frame_t * frame)
1892 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1893
1894 static uword
1895 ip4_punt (vlib_main_t * vm,
1896           vlib_node_runtime_t * node,
1897           vlib_frame_t * frame)
1898 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1899
1900 static uword
1901 ip4_miss (vlib_main_t * vm,
1902           vlib_node_runtime_t * node,
1903           vlib_frame_t * frame)
1904 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1905
1906 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1907   .function = ip4_drop,
1908   .name = "ip4-drop",
1909   .vector_size = sizeof (u32),
1910
1911   .format_trace = format_ip4_forward_next_trace,
1912
1913   .n_next_nodes = 1,
1914   .next_nodes = {
1915     [0] = "error-drop",
1916   },
1917 };
1918
1919 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1920   .function = ip4_punt,
1921   .name = "ip4-punt",
1922   .vector_size = sizeof (u32),
1923
1924   .format_trace = format_ip4_forward_next_trace,
1925
1926   .n_next_nodes = 1,
1927   .next_nodes = {
1928     [0] = "error-punt",
1929   },
1930 };
1931
1932 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1933   .function = ip4_miss,
1934   .name = "ip4-miss",
1935   .vector_size = sizeof (u32),
1936
1937   .format_trace = format_ip4_forward_next_trace,
1938
1939   .n_next_nodes = 1,
1940   .next_nodes = {
1941     [0] = "error-drop",
1942   },
1943 };
1944
1945 /* Compute TCP/UDP/ICMP4 checksum in software. */
1946 u16
1947 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1948                               ip4_header_t * ip0)
1949 {
1950   ip_csum_t sum0;
1951   u32 ip_header_length, payload_length_host_byte_order;
1952   u32 n_this_buffer, n_bytes_left;
1953   u16 sum16;
1954   void * data_this_buffer;
1955   
1956   /* Initialize checksum with ip header. */
1957   ip_header_length = ip4_header_bytes (ip0);
1958   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1959   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1960
1961   if (BITS (uword) == 32)
1962     {
1963       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1964       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1965     }
1966   else
1967     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1968
1969   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1970   data_this_buffer = (void *) ip0 + ip_header_length;
1971   if (n_this_buffer + ip_header_length > p0->current_length)
1972     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1973   while (1)
1974     {
1975       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1976       n_bytes_left -= n_this_buffer;
1977       if (n_bytes_left == 0)
1978         break;
1979
1980       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1981       p0 = vlib_get_buffer (vm, p0->next_buffer);
1982       data_this_buffer = vlib_buffer_get_current (p0);
1983       n_this_buffer = p0->current_length;
1984     }
1985
1986   sum16 = ~ ip_csum_fold (sum0);
1987
1988   return sum16;
1989 }
1990
1991 static u32
1992 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1993 {
1994   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1995   udp_header_t * udp0;
1996   u16 sum16;
1997
1998   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1999           || ip0->protocol == IP_PROTOCOL_UDP);
2000
2001   udp0 = (void *) (ip0 + 1);
2002   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
2003     {
2004       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
2005                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
2006       return p0->flags;
2007     }
2008
2009   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
2010
2011   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
2012                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
2013
2014   return p0->flags;
2015 }
2016
2017 static uword
2018 ip4_local (vlib_main_t * vm,
2019            vlib_node_runtime_t * node,
2020            vlib_frame_t * frame)
2021 {
2022   ip4_main_t * im = &ip4_main;
2023   ip_lookup_main_t * lm = &im->lookup_main;
2024   ip_local_next_t next_index;
2025   u32 * from, * to_next, n_left_from, n_left_to_next;
2026   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2027
2028   from = vlib_frame_vector_args (frame);
2029   n_left_from = frame->n_vectors;
2030   next_index = node->cached_next_index;
2031   
2032   if (node->flags & VLIB_NODE_FLAG_TRACE)
2033     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2034
2035   while (n_left_from > 0)
2036     {
2037       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2038
2039       while (n_left_from >= 4 && n_left_to_next >= 2)
2040         {
2041           vlib_buffer_t * p0, * p1;
2042           ip4_header_t * ip0, * ip1;
2043           udp_header_t * udp0, * udp1;
2044           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2045           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2046           ip_adjacency_t * adj0, * adj1;
2047           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2048           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2049           i32 len_diff0, len_diff1;
2050           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2051           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2052           u8 enqueue_code;
2053       
2054           pi0 = to_next[0] = from[0];
2055           pi1 = to_next[1] = from[1];
2056           from += 2;
2057           n_left_from -= 2;
2058           to_next += 2;
2059           n_left_to_next -= 2;
2060       
2061           p0 = vlib_get_buffer (vm, pi0);
2062           p1 = vlib_get_buffer (vm, pi1);
2063
2064           ip0 = vlib_buffer_get_current (p0);
2065           ip1 = vlib_buffer_get_current (p1);
2066
2067           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2068                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2069           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2070                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2071
2072           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2073           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2074
2075           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2076
2077           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2078           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2079
2080           proto0 = ip0->protocol;
2081           proto1 = ip1->protocol;
2082           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2083           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2084           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2085           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2086
2087           flags0 = p0->flags;
2088           flags1 = p1->flags;
2089
2090           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2091           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2092
2093           udp0 = ip4_next_header (ip0);
2094           udp1 = ip4_next_header (ip1);
2095
2096           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2097           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2098           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2099
2100           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2101           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2102
2103           /* Verify UDP length. */
2104           ip_len0 = clib_net_to_host_u16 (ip0->length);
2105           ip_len1 = clib_net_to_host_u16 (ip1->length);
2106           udp_len0 = clib_net_to_host_u16 (udp0->length);
2107           udp_len1 = clib_net_to_host_u16 (udp1->length);
2108
2109           len_diff0 = ip_len0 - udp_len0;
2110           len_diff1 = ip_len1 - udp_len1;
2111
2112           len_diff0 = is_udp0 ? len_diff0 : 0;
2113           len_diff1 = is_udp1 ? len_diff1 : 0;
2114
2115           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2116                                 & good_tcp_udp0 & good_tcp_udp1)))
2117             {
2118               if (is_tcp_udp0)
2119                 {
2120                   if (is_tcp_udp0
2121                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2122                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2123                   good_tcp_udp0 =
2124                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2125                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2126                 }
2127               if (is_tcp_udp1)
2128                 {
2129                   if (is_tcp_udp1
2130                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2131                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2132                   good_tcp_udp1 =
2133                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2134                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2135                 }
2136             }
2137
2138           good_tcp_udp0 &= len_diff0 >= 0;
2139           good_tcp_udp1 &= len_diff1 >= 0;
2140
2141           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2142           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2143
2144           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2145
2146           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2147           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2148
2149           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2150           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2151                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2152                     : error0);
2153           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2154                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2155                     : error1);
2156
2157           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2158           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2159
2160           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2161           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2162
2163           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2164           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2165
2166           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2167                                                            &ip0->src_address,
2168                                                            /* no_default_route */ 1));
2169           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2170                                                            &ip1->src_address,
2171                                                            /* no_default_route */ 1));
2172
2173           adj0 = ip_get_adjacency (lm, adj_index0);
2174           adj1 = ip_get_adjacency (lm, adj_index1);
2175
2176           /* 
2177            * Must have a route to source otherwise we drop the packet.
2178            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2179            */
2180           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2181                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2182                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2183                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2184                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2185                     ? IP4_ERROR_SRC_LOOKUP_MISS
2186                     : error0);
2187           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2188                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2189                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2190                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2191                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2192                     ? IP4_ERROR_SRC_LOOKUP_MISS
2193                     : error1);
2194
2195           next0 = lm->local_next_by_ip_protocol[proto0];
2196           next1 = lm->local_next_by_ip_protocol[proto1];
2197
2198           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2199           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2200
2201           p0->error = error0 ? error_node->errors[error0] : 0;
2202           p1->error = error1 ? error_node->errors[error1] : 0;
2203
2204           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2205
2206           if (PREDICT_FALSE (enqueue_code != 0))
2207             {
2208               switch (enqueue_code)
2209                 {
2210                 case 1:
2211                   /* A B A */
2212                   to_next[-2] = pi1;
2213                   to_next -= 1;
2214                   n_left_to_next += 1;
2215                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2216                   break;
2217
2218                 case 2:
2219                   /* A A B */
2220                   to_next -= 1;
2221                   n_left_to_next += 1;
2222                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2223                   break;
2224
2225                 case 3:
2226                   /* A B B or A B C */
2227                   to_next -= 2;
2228                   n_left_to_next += 2;
2229                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2230                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2231                   if (next0 == next1)
2232                     {
2233                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2234                       next_index = next1;
2235                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2236                     }
2237                   break;
2238                 }
2239             }
2240         }
2241
2242       while (n_left_from > 0 && n_left_to_next > 0)
2243         {
2244           vlib_buffer_t * p0;
2245           ip4_header_t * ip0;
2246           udp_header_t * udp0;
2247           ip4_fib_mtrie_t * mtrie0;
2248           ip4_fib_mtrie_leaf_t leaf0;
2249           ip_adjacency_t * adj0;
2250           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2251           i32 len_diff0;
2252           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2253       
2254           pi0 = to_next[0] = from[0];
2255           from += 1;
2256           n_left_from -= 1;
2257           to_next += 1;
2258           n_left_to_next -= 1;
2259       
2260           p0 = vlib_get_buffer (vm, pi0);
2261
2262           ip0 = vlib_buffer_get_current (p0);
2263
2264           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2265                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2266
2267           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2268
2269           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2270
2271           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2272
2273           proto0 = ip0->protocol;
2274           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2275           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2276
2277           flags0 = p0->flags;
2278
2279           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2280
2281           udp0 = ip4_next_header (ip0);
2282
2283           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2284           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2285
2286           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2287
2288           /* Verify UDP length. */
2289           ip_len0 = clib_net_to_host_u16 (ip0->length);
2290           udp_len0 = clib_net_to_host_u16 (udp0->length);
2291
2292           len_diff0 = ip_len0 - udp_len0;
2293
2294           len_diff0 = is_udp0 ? len_diff0 : 0;
2295
2296           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2297             {
2298               if (is_tcp_udp0)
2299                 {
2300                   if (is_tcp_udp0
2301                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2302                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2303                   good_tcp_udp0 =
2304                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2305                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2306                 }
2307             }
2308
2309           good_tcp_udp0 &= len_diff0 >= 0;
2310
2311           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2312
2313           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2314
2315           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2316
2317           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2318           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2319                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2320                     : error0);
2321
2322           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2323
2324           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2325           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2326
2327           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2328                                                            &ip0->src_address,
2329                                                            /* no_default_route */ 1));
2330
2331           adj0 = ip_get_adjacency (lm, adj_index0);
2332
2333           /* Must have a route to source otherwise we drop the packet. */
2334           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2335                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2336                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2337                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2338                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2339                     ? IP4_ERROR_SRC_LOOKUP_MISS
2340                     : error0);
2341
2342           next0 = lm->local_next_by_ip_protocol[proto0];
2343
2344           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2345
2346           p0->error = error0? error_node->errors[error0] : 0;
2347
2348           if (PREDICT_FALSE (next0 != next_index))
2349             {
2350               n_left_to_next += 1;
2351               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2352
2353               next_index = next0;
2354               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2355               to_next[0] = pi0;
2356               to_next += 1;
2357               n_left_to_next -= 1;
2358             }
2359         }
2360   
2361       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2362     }
2363
2364   return frame->n_vectors;
2365 }
2366
2367 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2368   .function = ip4_local,
2369   .name = "ip4-local",
2370   .vector_size = sizeof (u32),
2371
2372   .format_trace = format_ip4_forward_next_trace,
2373
2374   .n_next_nodes = IP_LOCAL_N_NEXT,
2375   .next_nodes = {
2376     [IP_LOCAL_NEXT_DROP] = "error-drop",
2377     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2378     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2379     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2380   },
2381 };
2382
2383 void ip4_register_protocol (u32 protocol, u32 node_index)
2384 {
2385   vlib_main_t * vm = vlib_get_main();
2386   ip4_main_t * im = &ip4_main;
2387   ip_lookup_main_t * lm = &im->lookup_main;
2388
2389   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2390   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2391 }
2392
2393 static clib_error_t *
2394 show_ip_local_command_fn (vlib_main_t * vm,
2395                           unformat_input_t * input,
2396                          vlib_cli_command_t * cmd)
2397 {
2398   ip4_main_t * im = &ip4_main;
2399   ip_lookup_main_t * lm = &im->lookup_main;
2400   int i;
2401
2402   vlib_cli_output (vm, "Protocols handled by ip4_local");
2403   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2404     {
2405       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2406         vlib_cli_output (vm, "%d", i);
2407     }
2408   return 0;
2409 }
2410
2411
2412
2413 VLIB_CLI_COMMAND (show_ip_local, static) = {
2414   .path = "show ip local",
2415   .function = show_ip_local_command_fn,
2416   .short_help = "Show ip local protocol table",
2417 };
2418
2419 static uword
2420 ip4_arp (vlib_main_t * vm,
2421          vlib_node_runtime_t * node,
2422          vlib_frame_t * frame)
2423 {
2424   vnet_main_t * vnm = vnet_get_main();
2425   ip4_main_t * im = &ip4_main;
2426   ip_lookup_main_t * lm = &im->lookup_main;
2427   u32 * from, * to_next_drop;
2428   uword n_left_from, n_left_to_next_drop, next_index;
2429   static f64 time_last_seed_change = -1e100;
2430   static u32 hash_seeds[3];
2431   static uword hash_bitmap[256 / BITS (uword)]; 
2432   f64 time_now;
2433
2434   if (node->flags & VLIB_NODE_FLAG_TRACE)
2435     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2436
2437   time_now = vlib_time_now (vm);
2438   if (time_now - time_last_seed_change > 1e-3)
2439     {
2440       uword i;
2441       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2442                                              sizeof (hash_seeds));
2443       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2444         hash_seeds[i] = r[i];
2445
2446       /* Mark all hash keys as been no-seen before. */
2447       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2448         hash_bitmap[i] = 0;
2449
2450       time_last_seed_change = time_now;
2451     }
2452
2453   from = vlib_frame_vector_args (frame);
2454   n_left_from = frame->n_vectors;
2455   next_index = node->cached_next_index;
2456   if (next_index == IP4_ARP_NEXT_DROP)
2457     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2458
2459   while (n_left_from > 0)
2460     {
2461       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2462                            to_next_drop, n_left_to_next_drop);
2463
2464       while (n_left_from > 0 && n_left_to_next_drop > 0)
2465         {
2466           vlib_buffer_t * p0;
2467           ip4_header_t * ip0;
2468           ethernet_header_t * eh0;
2469           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2470           uword bm0;
2471           ip_adjacency_t * adj0;
2472
2473           pi0 = from[0];
2474
2475           p0 = vlib_get_buffer (vm, pi0);
2476
2477           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2478           adj0 = ip_get_adjacency (lm, adj_index0);
2479           ip0 = vlib_buffer_get_current (p0);
2480
2481           /* 
2482            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2483            * rewrite to this packet, we need to skip it here.
2484            * Note, to distinguish from src IP addr *.8.6.*, we
2485            * check for a bcast eth dest instead of IPv4 version.
2486            */
2487           eh0 = (ethernet_header_t*)ip0;
2488           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2489             {
2490               u32 vlan_num = 0;
2491               u16 * etype = &eh0->type;
2492               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2493                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2494                 {
2495                   vlan_num += 1;
2496                   etype += 2; //vlan tag also 16 bits, same as etype
2497                 }
2498               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2499                 {
2500                   vlib_buffer_advance (
2501                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2502                   ip0 = vlib_buffer_get_current (p0);
2503                 }
2504             }
2505
2506           a0 = hash_seeds[0];
2507           b0 = hash_seeds[1];
2508           c0 = hash_seeds[2];
2509
2510           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2511           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2512
2513           a0 ^= ip0->dst_address.data_u32;
2514           b0 ^= sw_if_index0;
2515
2516           hash_v3_finalize32 (a0, b0, c0);
2517
2518           c0 &= BITS (hash_bitmap) - 1;
2519           c0 = c0 / BITS (uword);
2520           m0 = (uword) 1 << (c0 % BITS (uword));
2521
2522           bm0 = hash_bitmap[c0];
2523           drop0 = (bm0 & m0) != 0;
2524
2525           /* Mark it as seen. */
2526           hash_bitmap[c0] = bm0 | m0;
2527
2528           from += 1;
2529           n_left_from -= 1;
2530           to_next_drop[0] = pi0;
2531           to_next_drop += 1;
2532           n_left_to_next_drop -= 1;
2533
2534           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2535
2536           if (drop0)
2537             continue;
2538
2539           /* 
2540            * Can happen if the control-plane is programming tables
2541            * with traffic flowing; at least that's today's lame excuse.
2542            */
2543           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2544             {
2545               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2546             }
2547           else
2548           /* Send ARP request. */
2549           {
2550             u32 bi0 = 0;
2551             vlib_buffer_t * b0;
2552             ethernet_arp_header_t * h0;
2553             vnet_hw_interface_t * hw_if0;
2554
2555             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2556
2557             /* Add rewrite/encap string for ARP packet. */
2558             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2559
2560             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2561
2562             /* Src ethernet address in ARP header. */
2563             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2564                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2565
2566             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2567
2568             /* Copy in destination address we are requesting. */
2569             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2570
2571             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2572             b0 = vlib_get_buffer (vm, bi0);
2573             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2574
2575             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2576
2577             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2578           }
2579         }
2580
2581       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2582     }
2583
2584   return frame->n_vectors;
2585 }
2586
2587 static char * ip4_arp_error_strings[] = {
2588   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2589   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2590   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2591   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2592   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2593 };
2594
2595 VLIB_REGISTER_NODE (ip4_arp_node) = {
2596   .function = ip4_arp,
2597   .name = "ip4-arp",
2598   .vector_size = sizeof (u32),
2599
2600   .format_trace = format_ip4_forward_next_trace,
2601
2602   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2603   .error_strings = ip4_arp_error_strings,
2604
2605   .n_next_nodes = IP4_ARP_N_NEXT,
2606   .next_nodes = {
2607     [IP4_ARP_NEXT_DROP] = "error-drop",
2608   },
2609 };
2610
2611 #define foreach_notrace_ip4_arp_error           \
2612 _(DROP)                                         \
2613 _(REQUEST_SENT)                                 \
2614 _(REPLICATE_DROP)                               \
2615 _(REPLICATE_FAIL)
2616
2617 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2618 {
2619   vlib_node_runtime_t *rt = 
2620     vlib_node_get_runtime (vm, ip4_arp_node.index);
2621
2622   /* don't trace ARP request packets */
2623 #define _(a)                                    \
2624     vnet_pcap_drop_trace_filter_add_del         \
2625         (rt->errors[IP4_ARP_ERROR_##a],         \
2626          1 /* is_add */);
2627     foreach_notrace_ip4_arp_error;
2628 #undef _
2629   return 0;
2630 }
2631
2632 VLIB_INIT_FUNCTION(arp_notrace_init);
2633
2634
2635 /* Send an ARP request to see if given destination is reachable on given interface. */
2636 clib_error_t *
2637 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2638 {
2639   vnet_main_t * vnm = vnet_get_main();
2640   ip4_main_t * im = &ip4_main;
2641   ethernet_arp_header_t * h;
2642   ip4_address_t * src;
2643   ip_interface_address_t * ia;
2644   ip_adjacency_t * adj;
2645   vnet_hw_interface_t * hi;
2646   vnet_sw_interface_t * si;
2647   vlib_buffer_t * b;
2648   u32 bi = 0;
2649
2650   si = vnet_get_sw_interface (vnm, sw_if_index);
2651
2652   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2653     {
2654       return clib_error_return (0, "%U: interface %U down",
2655                                 format_ip4_address, dst, 
2656                                 format_vnet_sw_if_index_name, vnm, 
2657                                 sw_if_index);
2658     }
2659
2660   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2661   if (! src)
2662     {
2663       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2664       return clib_error_return 
2665         (0, "no matching interface address for destination %U (interface %U)",
2666          format_ip4_address, dst,
2667          format_vnet_sw_if_index_name, vnm, sw_if_index);
2668     }
2669
2670   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2671
2672   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2673
2674   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2675
2676   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2677
2678   h->ip4_over_ethernet[0].ip4 = src[0];
2679   h->ip4_over_ethernet[1].ip4 = dst[0];
2680
2681   b = vlib_get_buffer (vm, bi);
2682   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2683
2684   /* Add encapsulation string for software interface (e.g. ethernet header). */
2685   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2686   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2687
2688   {
2689     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2690     u32 * to_next = vlib_frame_vector_args (f);
2691     to_next[0] = bi;
2692     f->n_vectors = 1;
2693     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2694   }
2695
2696   return /* no error */ 0;
2697 }
2698
2699 typedef enum {
2700   IP4_REWRITE_NEXT_DROP,
2701   IP4_REWRITE_NEXT_ARP,
2702 } ip4_rewrite_next_t;
2703
2704 always_inline uword
2705 ip4_rewrite_inline (vlib_main_t * vm,
2706                     vlib_node_runtime_t * node,
2707                     vlib_frame_t * frame,
2708                     int rewrite_for_locally_received_packets)
2709 {
2710   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2711   u32 * from = vlib_frame_vector_args (frame);
2712   u32 n_left_from, n_left_to_next, * to_next, next_index;
2713   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2714   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2715
2716   n_left_from = frame->n_vectors;
2717   next_index = node->cached_next_index;
2718   u32 cpu_index = os_get_cpu_number();
2719   
2720   while (n_left_from > 0)
2721     {
2722       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2723
2724       while (n_left_from >= 4 && n_left_to_next >= 2)
2725         {
2726           ip_adjacency_t * adj0, * adj1;
2727           vlib_buffer_t * p0, * p1;
2728           ip4_header_t * ip0, * ip1;
2729           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2730           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2731           u32 next0_override, next1_override;
2732       
2733           if (rewrite_for_locally_received_packets)
2734               next0_override = next1_override = 0;
2735
2736           /* Prefetch next iteration. */
2737           {
2738             vlib_buffer_t * p2, * p3;
2739
2740             p2 = vlib_get_buffer (vm, from[2]);
2741             p3 = vlib_get_buffer (vm, from[3]);
2742
2743             vlib_prefetch_buffer_header (p2, STORE);
2744             vlib_prefetch_buffer_header (p3, STORE);
2745
2746             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2747             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2748           }
2749
2750           pi0 = to_next[0] = from[0];
2751           pi1 = to_next[1] = from[1];
2752
2753           from += 2;
2754           n_left_from -= 2;
2755           to_next += 2;
2756           n_left_to_next -= 2;
2757       
2758           p0 = vlib_get_buffer (vm, pi0);
2759           p1 = vlib_get_buffer (vm, pi1);
2760
2761           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2762           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2763
2764           /* We should never rewrite a pkt using the MISS adjacency */
2765           ASSERT(adj_index0 && adj_index1);
2766
2767           ip0 = vlib_buffer_get_current (p0);
2768           ip1 = vlib_buffer_get_current (p1);
2769
2770           error0 = error1 = IP4_ERROR_NONE;
2771
2772           /* Decrement TTL & update checksum.
2773              Works either endian, so no need for byte swap. */
2774           if (! rewrite_for_locally_received_packets)
2775             {
2776               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2777
2778               /* Input node should have reject packets with ttl 0. */
2779               ASSERT (ip0->ttl > 0);
2780               ASSERT (ip1->ttl > 0);
2781
2782               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2783               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2784
2785               checksum0 += checksum0 >= 0xffff;
2786               checksum1 += checksum1 >= 0xffff;
2787
2788               ip0->checksum = checksum0;
2789               ip1->checksum = checksum1;
2790
2791               ttl0 -= 1;
2792               ttl1 -= 1;
2793
2794               ip0->ttl = ttl0;
2795               ip1->ttl = ttl1;
2796
2797               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2798               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2799
2800               /* Verify checksum. */
2801               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2802               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2803             }
2804
2805           /* Rewrite packet header and updates lengths. */
2806           adj0 = ip_get_adjacency (lm, adj_index0);
2807           adj1 = ip_get_adjacency (lm, adj_index1);
2808       
2809           if (rewrite_for_locally_received_packets)
2810             {
2811               /*
2812                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2813                * we end up here with a local adjacency in hand
2814                * The local adj rewrite data is 0xfefe on purpose.
2815                * Bad engineer, no donut for you.
2816                */
2817               if (PREDICT_FALSE(adj0->lookup_next_index 
2818                                 == IP_LOOKUP_NEXT_LOCAL))
2819                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2820               if (PREDICT_FALSE(adj0->lookup_next_index
2821                                 == IP_LOOKUP_NEXT_ARP))
2822                 next0_override = IP4_REWRITE_NEXT_ARP;
2823               if (PREDICT_FALSE(adj1->lookup_next_index 
2824                                 == IP_LOOKUP_NEXT_LOCAL))
2825                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2826               if (PREDICT_FALSE(adj1->lookup_next_index
2827                                 == IP_LOOKUP_NEXT_ARP))
2828                 next1_override = IP4_REWRITE_NEXT_ARP;
2829             }
2830
2831           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2832           rw_len0 = adj0[0].rewrite_header.data_bytes;
2833           rw_len1 = adj1[0].rewrite_header.data_bytes;
2834           next0 = (error0 == IP4_ERROR_NONE) 
2835             ? adj0[0].rewrite_header.next_index : 0;
2836
2837           if (rewrite_for_locally_received_packets)
2838               next0 = next0 && next0_override ? next0_override : next0;
2839
2840           next1 = (error1 == IP4_ERROR_NONE)
2841             ? adj1[0].rewrite_header.next_index : 0;
2842
2843           if (rewrite_for_locally_received_packets)
2844               next1 = next1 && next1_override ? next1_override : next1;
2845
2846           /* 
2847            * We've already accounted for an ethernet_header_t elsewhere
2848            */
2849           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2850               vlib_increment_combined_counter 
2851                   (&lm->adjacency_counters,
2852                    cpu_index, adj_index0, 
2853                    /* packet increment */ 0,
2854                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2855
2856           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2857               vlib_increment_combined_counter 
2858                   (&lm->adjacency_counters,
2859                    cpu_index, adj_index1, 
2860                    /* packet increment */ 0,
2861                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2862
2863           /* Check MTU of outgoing interface. */
2864           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2865                     ? IP4_ERROR_MTU_EXCEEDED
2866                     : error0);
2867           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2868                     ? IP4_ERROR_MTU_EXCEEDED
2869                     : error1);
2870
2871           p0->current_data -= rw_len0;
2872           p1->current_data -= rw_len1;
2873
2874           p0->current_length += rw_len0;
2875           p1->current_length += rw_len1;
2876
2877           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2878           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2879       
2880           p0->error = error_node->errors[error0];
2881           p1->error = error_node->errors[error1];
2882
2883           /* Guess we are only writing on simple Ethernet header. */
2884           vnet_rewrite_two_headers (adj0[0], adj1[0],
2885                                     ip0, ip1,
2886                                     sizeof (ethernet_header_t));
2887       
2888           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2889                                            to_next, n_left_to_next,
2890                                            pi0, pi1, next0, next1);
2891         }
2892
2893       while (n_left_from > 0 && n_left_to_next > 0)
2894         {
2895           ip_adjacency_t * adj0;
2896           vlib_buffer_t * p0;
2897           ip4_header_t * ip0;
2898           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2899           u32 next0_override;
2900       
2901           if (rewrite_for_locally_received_packets)
2902               next0_override = 0;
2903
2904           pi0 = to_next[0] = from[0];
2905
2906           p0 = vlib_get_buffer (vm, pi0);
2907
2908           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2909
2910           /* We should never rewrite a pkt using the MISS adjacency */
2911           ASSERT(adj_index0);
2912
2913           adj0 = ip_get_adjacency (lm, adj_index0);
2914       
2915           ip0 = vlib_buffer_get_current (p0);
2916
2917           error0 = IP4_ERROR_NONE;
2918           next0 = 0;            /* drop on error */
2919
2920           /* Decrement TTL & update checksum. */
2921           if (! rewrite_for_locally_received_packets)
2922             {
2923               i32 ttl0 = ip0->ttl;
2924
2925               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2926
2927               checksum0 += checksum0 >= 0xffff;
2928
2929               ip0->checksum = checksum0;
2930
2931               ASSERT (ip0->ttl > 0);
2932
2933               ttl0 -= 1;
2934
2935               ip0->ttl = ttl0;
2936
2937               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2938
2939               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2940             }
2941
2942           if (rewrite_for_locally_received_packets)
2943             {
2944               /*
2945                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2946                * we end up here with a local adjacency in hand
2947                * The local adj rewrite data is 0xfefe on purpose.
2948                * Bad engineer, no donut for you.
2949                */
2950               if (PREDICT_FALSE(adj0->lookup_next_index 
2951                                 == IP_LOOKUP_NEXT_LOCAL))
2952                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2953               /* 
2954                * We have to override the next_index in ARP adjacencies,
2955                * because they're set up for ip4-arp, not this node...
2956                */
2957               if (PREDICT_FALSE(adj0->lookup_next_index
2958                                 == IP_LOOKUP_NEXT_ARP))
2959                 next0_override = IP4_REWRITE_NEXT_ARP;
2960             }
2961
2962           /* Guess we are only writing on simple Ethernet header. */
2963           vnet_rewrite_one_header (adj0[0], ip0, 
2964                                    sizeof (ethernet_header_t));
2965           
2966           /* Update packet buffer attributes/set output interface. */
2967           rw_len0 = adj0[0].rewrite_header.data_bytes;
2968           
2969           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2970               vlib_increment_combined_counter 
2971                   (&lm->adjacency_counters,
2972                    cpu_index, adj_index0, 
2973                    /* packet increment */ 0,
2974                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2975           
2976           /* Check MTU of outgoing interface. */
2977           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2978                     > adj0[0].rewrite_header.max_l3_packet_bytes
2979                     ? IP4_ERROR_MTU_EXCEEDED
2980                     : error0);
2981           
2982           p0->error = error_node->errors[error0];
2983           p0->current_data -= rw_len0;
2984           p0->current_length += rw_len0;
2985           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2986             adj0[0].rewrite_header.sw_if_index;
2987           
2988           next0 = (error0 == IP4_ERROR_NONE)
2989             ? adj0[0].rewrite_header.next_index : 0;
2990
2991           if (rewrite_for_locally_received_packets)
2992               next0 = next0 && next0_override ? next0_override : next0;
2993
2994           from += 1;
2995           n_left_from -= 1;
2996           to_next += 1;
2997           n_left_to_next -= 1;
2998       
2999           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3000                                            to_next, n_left_to_next,
3001                                            pi0, next0);
3002         }
3003   
3004       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3005     }
3006
3007   /* Need to do trace after rewrites to pick up new packet data. */
3008   if (node->flags & VLIB_NODE_FLAG_TRACE)
3009     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3010
3011   return frame->n_vectors;
3012 }
3013
3014 static uword
3015 ip4_rewrite_transit (vlib_main_t * vm,
3016                      vlib_node_runtime_t * node,
3017                      vlib_frame_t * frame)
3018 {
3019   return ip4_rewrite_inline (vm, node, frame,
3020                              /* rewrite_for_locally_received_packets */ 0);
3021 }
3022
3023 static uword
3024 ip4_rewrite_local (vlib_main_t * vm,
3025                    vlib_node_runtime_t * node,
3026                    vlib_frame_t * frame)
3027 {
3028   return ip4_rewrite_inline (vm, node, frame,
3029                              /* rewrite_for_locally_received_packets */ 1);
3030 }
3031
3032 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3033   .function = ip4_rewrite_transit,
3034   .name = "ip4-rewrite-transit",
3035   .vector_size = sizeof (u32),
3036
3037   .format_trace = format_ip4_forward_next_trace,
3038
3039   .n_next_nodes = 2,
3040   .next_nodes = {
3041     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3042     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3043   },
3044 };
3045
3046 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
3047   .function = ip4_rewrite_local,
3048   .name = "ip4-rewrite-local",
3049   .vector_size = sizeof (u32),
3050
3051   .sibling_of = "ip4-rewrite-transit",
3052
3053   .format_trace = format_ip4_forward_next_trace,
3054
3055   .n_next_nodes = 2,
3056   .next_nodes = {
3057     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3058     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3059   },
3060 };
3061
3062 static clib_error_t *
3063 add_del_interface_table (vlib_main_t * vm,
3064                          unformat_input_t * input,
3065                          vlib_cli_command_t * cmd)
3066 {
3067   vnet_main_t * vnm = vnet_get_main();
3068   clib_error_t * error = 0;
3069   u32 sw_if_index, table_id;
3070
3071   sw_if_index = ~0;
3072
3073   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3074     {
3075       error = clib_error_return (0, "unknown interface `%U'",
3076                                  format_unformat_error, input);
3077       goto done;
3078     }
3079
3080   if (unformat (input, "%d", &table_id))
3081     ;
3082   else
3083     {
3084       error = clib_error_return (0, "expected table id `%U'",
3085                                  format_unformat_error, input);
3086       goto done;
3087     }
3088
3089   {
3090     ip4_main_t * im = &ip4_main;
3091     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3092
3093     if (fib) 
3094       {
3095         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3096         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3097     }
3098   }
3099
3100  done:
3101   return error;
3102 }
3103
3104 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3105   .path = "set interface ip table",
3106   .function = add_del_interface_table,
3107   .short_help = "Add/delete FIB table id for interface",
3108 };
3109
3110
3111 static uword
3112 ip4_lookup_multicast (vlib_main_t * vm,
3113                       vlib_node_runtime_t * node,
3114                       vlib_frame_t * frame)
3115 {
3116   ip4_main_t * im = &ip4_main;
3117   ip_lookup_main_t * lm = &im->lookup_main;
3118   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3119   u32 n_left_from, n_left_to_next, * from, * to_next;
3120   ip_lookup_next_t next;
3121   u32 cpu_index = os_get_cpu_number();
3122
3123   from = vlib_frame_vector_args (frame);
3124   n_left_from = frame->n_vectors;
3125   next = node->cached_next_index;
3126
3127   while (n_left_from > 0)
3128     {
3129       vlib_get_next_frame (vm, node, next,
3130                            to_next, n_left_to_next);
3131
3132       while (n_left_from >= 4 && n_left_to_next >= 2)
3133         {
3134           vlib_buffer_t * p0, * p1;
3135           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3136           ip_lookup_next_t next0, next1;
3137           ip4_header_t * ip0, * ip1;
3138           ip_adjacency_t * adj0, * adj1;
3139           u32 fib_index0, fib_index1;
3140           u32 flow_hash_config0, flow_hash_config1;
3141
3142           /* Prefetch next iteration. */
3143           {
3144             vlib_buffer_t * p2, * p3;
3145
3146             p2 = vlib_get_buffer (vm, from[2]);
3147             p3 = vlib_get_buffer (vm, from[3]);
3148
3149             vlib_prefetch_buffer_header (p2, LOAD);
3150             vlib_prefetch_buffer_header (p3, LOAD);
3151
3152             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3153             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3154           }
3155
3156           pi0 = to_next[0] = from[0];
3157           pi1 = to_next[1] = from[1];
3158
3159           p0 = vlib_get_buffer (vm, pi0);
3160           p1 = vlib_get_buffer (vm, pi1);
3161
3162           ip0 = vlib_buffer_get_current (p0);
3163           ip1 = vlib_buffer_get_current (p1);
3164
3165           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3166           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3167           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3168             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3169           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3170             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3171
3172           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3173                                               &ip0->dst_address, p0);
3174           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3175                                               &ip1->dst_address, p1);
3176
3177           adj0 = ip_get_adjacency (lm, adj_index0);
3178           adj1 = ip_get_adjacency (lm, adj_index1);
3179
3180           next0 = adj0->lookup_next_index;
3181           next1 = adj1->lookup_next_index;
3182
3183           flow_hash_config0 = 
3184               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3185
3186           flow_hash_config1 = 
3187               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3188
3189           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3190               (ip0, flow_hash_config0);
3191                                                                   
3192           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3193               (ip1, flow_hash_config1);
3194
3195           ASSERT (adj0->n_adj > 0);
3196           ASSERT (adj1->n_adj > 0);
3197           ASSERT (is_pow2 (adj0->n_adj));
3198           ASSERT (is_pow2 (adj1->n_adj));
3199           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3200           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3201
3202           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3203           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3204
3205           if (1) /* $$$$$$ HACK FIXME */
3206           vlib_increment_combined_counter 
3207               (cm, cpu_index, adj_index0, 1,
3208                vlib_buffer_length_in_chain (vm, p0));
3209           if (1) /* $$$$$$ HACK FIXME */
3210           vlib_increment_combined_counter 
3211               (cm, cpu_index, adj_index1, 1,
3212                vlib_buffer_length_in_chain (vm, p1));
3213
3214           from += 2;
3215           to_next += 2;
3216           n_left_to_next -= 2;
3217           n_left_from -= 2;
3218
3219           wrong_next = (next0 != next) + 2*(next1 != next);
3220           if (PREDICT_FALSE (wrong_next != 0))
3221             {
3222               switch (wrong_next)
3223                 {
3224                 case 1:
3225                   /* A B A */
3226                   to_next[-2] = pi1;
3227                   to_next -= 1;
3228                   n_left_to_next += 1;
3229                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3230                   break;
3231
3232                 case 2:
3233                   /* A A B */
3234                   to_next -= 1;
3235                   n_left_to_next += 1;
3236                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3237                   break;
3238
3239                 case 3:
3240                   /* A B C */
3241                   to_next -= 2;
3242                   n_left_to_next += 2;
3243                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3244                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3245                   if (next0 == next1)
3246                     {
3247                       /* A B B */
3248                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3249                       next = next1;
3250                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3251                     }
3252                 }
3253             }
3254         }
3255     
3256       while (n_left_from > 0 && n_left_to_next > 0)
3257         {
3258           vlib_buffer_t * p0;
3259           ip4_header_t * ip0;
3260           u32 pi0, adj_index0;
3261           ip_lookup_next_t next0;
3262           ip_adjacency_t * adj0;
3263           u32 fib_index0;
3264           u32 flow_hash_config0;
3265
3266           pi0 = from[0];
3267           to_next[0] = pi0;
3268
3269           p0 = vlib_get_buffer (vm, pi0);
3270
3271           ip0 = vlib_buffer_get_current (p0);
3272
3273           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3274                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3275           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3276               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3277           
3278           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3279                                               &ip0->dst_address, p0);
3280
3281           adj0 = ip_get_adjacency (lm, adj_index0);
3282
3283           next0 = adj0->lookup_next_index;
3284
3285           flow_hash_config0 = 
3286               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3287
3288           vnet_buffer (p0)->ip.flow_hash = 
3289             ip4_compute_flow_hash (ip0, flow_hash_config0);
3290
3291           ASSERT (adj0->n_adj > 0);
3292           ASSERT (is_pow2 (adj0->n_adj));
3293           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3294
3295           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3296
3297           if (1) /* $$$$$$ HACK FIXME */
3298               vlib_increment_combined_counter 
3299                   (cm, cpu_index, adj_index0, 1,
3300                    vlib_buffer_length_in_chain (vm, p0));
3301
3302           from += 1;
3303           to_next += 1;
3304           n_left_to_next -= 1;
3305           n_left_from -= 1;
3306
3307           if (PREDICT_FALSE (next0 != next))
3308             {
3309               n_left_to_next += 1;
3310               vlib_put_next_frame (vm, node, next, n_left_to_next);
3311               next = next0;
3312               vlib_get_next_frame (vm, node, next,
3313                                    to_next, n_left_to_next);
3314               to_next[0] = pi0;
3315               to_next += 1;
3316               n_left_to_next -= 1;
3317             }
3318         }
3319
3320       vlib_put_next_frame (vm, node, next, n_left_to_next);
3321     }
3322
3323   return frame->n_vectors;
3324 }
3325
3326 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3327   .function = ip4_lookup_multicast,
3328   .name = "ip4-lookup-multicast",
3329   .vector_size = sizeof (u32),
3330
3331   .n_next_nodes = IP_LOOKUP_N_NEXT,
3332   .next_nodes = {
3333     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
3334     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
3335     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
3336     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
3337     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
3338     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
3339     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
3340     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
3341     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
3342     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
3343     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
3344     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
3345     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
3346   },
3347 };
3348
3349 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3350   .function = ip4_drop,
3351   .name = "ip4-multicast",
3352   .vector_size = sizeof (u32),
3353
3354   .format_trace = format_ip4_forward_next_trace,
3355
3356   .n_next_nodes = 1,
3357   .next_nodes = {
3358     [0] = "error-drop",
3359   },
3360 };
3361
3362 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3363 {
3364   ip4_main_t * im = &ip4_main;
3365   ip4_fib_mtrie_t * mtrie0;
3366   ip4_fib_mtrie_leaf_t leaf0;
3367   u32 adj_index0;
3368     
3369   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3370
3371   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3372   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3373   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3374   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3375   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3376   
3377   /* Handle default route. */
3378   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3379   
3380   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3381   
3382   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3383                                                   a, 
3384                                                   /* no_default_route */ 0);
3385 }
3386  
3387 static clib_error_t *
3388 test_lookup_command_fn (vlib_main_t * vm,
3389                         unformat_input_t * input,
3390                         vlib_cli_command_t * cmd)
3391 {
3392   u32 table_id = 0;
3393   f64 count = 1;
3394   u32 n;
3395   int i;
3396   ip4_address_t ip4_base_address;
3397   u64 errors = 0;
3398
3399   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3400       if (unformat (input, "table %d", &table_id))
3401         ;
3402       else if (unformat (input, "count %f", &count))
3403         ;
3404
3405       else if (unformat (input, "%U",
3406                          unformat_ip4_address, &ip4_base_address))
3407         ;
3408       else
3409         return clib_error_return (0, "unknown input `%U'",
3410                                   format_unformat_error, input);
3411   }
3412
3413   n = count;
3414
3415   for (i = 0; i < n; i++)
3416     {
3417       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3418         errors++;
3419
3420       ip4_base_address.as_u32 = 
3421         clib_host_to_net_u32 (1 + 
3422                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3423     }
3424
3425   if (errors) 
3426     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3427   else
3428     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3429
3430   return 0;
3431 }
3432
3433 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3434     .path = "test lookup",
3435     .short_help = "test lookup",
3436     .function = test_lookup_command_fn,
3437 };
3438
3439 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3440 {
3441   ip4_main_t * im4 = &ip4_main;
3442   ip4_fib_t * fib;
3443   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3444
3445   if (p == 0)
3446     return VNET_API_ERROR_NO_SUCH_FIB;
3447
3448   fib = vec_elt_at_index (im4->fibs, p[0]);
3449
3450   fib->flow_hash_config = flow_hash_config;
3451   return 0;
3452 }
3453  
3454 static clib_error_t *
3455 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3456                              unformat_input_t * input,
3457                              vlib_cli_command_t * cmd)
3458 {
3459   int matched = 0;
3460   u32 table_id = 0;
3461   u32 flow_hash_config = 0;
3462   int rv;
3463
3464   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3465     if (unformat (input, "table %d", &table_id))
3466       matched = 1;
3467 #define _(a,v) \
3468     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3469     foreach_flow_hash_bit
3470 #undef _
3471     else break;
3472   }
3473   
3474   if (matched == 0)
3475     return clib_error_return (0, "unknown input `%U'",
3476                               format_unformat_error, input);
3477   
3478   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3479   switch (rv)
3480     {
3481     case 0:
3482       break;
3483       
3484     case VNET_API_ERROR_NO_SUCH_FIB:
3485       return clib_error_return (0, "no such FIB table %d", table_id);
3486       
3487     default:
3488       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3489       break;
3490     }
3491   
3492   return 0;
3493 }
3494  
3495 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3496   .path = "set ip flow-hash",
3497   .short_help = 
3498   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3499   .function = set_ip_flow_hash_command_fn,
3500 };
3501  
3502 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3503                                  u32 table_index)
3504 {
3505   vnet_main_t * vnm = vnet_get_main();
3506   vnet_interface_main_t * im = &vnm->interface_main;
3507   ip4_main_t * ipm = &ip4_main;
3508   ip_lookup_main_t * lm = &ipm->lookup_main;
3509   vnet_classify_main_t * cm = &vnet_classify_main;
3510
3511   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3512     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3513
3514   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3515     return VNET_API_ERROR_NO_SUCH_ENTRY;
3516
3517   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3518   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3519
3520   return 0;
3521 }
3522
3523 static clib_error_t *
3524 set_ip_classify_command_fn (vlib_main_t * vm,
3525                             unformat_input_t * input,
3526                             vlib_cli_command_t * cmd)
3527 {
3528   u32 table_index = ~0;
3529   int table_index_set = 0;
3530   u32 sw_if_index = ~0;
3531   int rv;
3532   
3533   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3534     if (unformat (input, "table-index %d", &table_index))
3535       table_index_set = 1;
3536     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3537                        vnet_get_main(), &sw_if_index))
3538       ;
3539     else
3540       break;
3541   }
3542       
3543   if (table_index_set == 0)
3544     return clib_error_return (0, "classify table-index must be specified");
3545
3546   if (sw_if_index == ~0)
3547     return clib_error_return (0, "interface / subif must be specified");
3548
3549   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3550
3551   switch (rv)
3552     {
3553     case 0:
3554       break;
3555
3556     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3557       return clib_error_return (0, "No such interface");
3558
3559     case VNET_API_ERROR_NO_SUCH_ENTRY:
3560       return clib_error_return (0, "No such classifier table");
3561     }
3562   return 0;
3563 }
3564
3565 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3566     .path = "set ip classify",
3567     .short_help = 
3568     "set ip classify intfc <int> table-index <index>",
3569     .function = set_ip_classify_command_fn,
3570 };
3571