45bc22e7550d85d6f3d989bfd067fb1392b4dcf0
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void serialize_ip4_address (serialize_main_t * m, va_list * va)
140 {
141   ip4_address_t * a = va_arg (*va, ip4_address_t *);
142   u8 * p = serialize_get (m, sizeof (a->as_u8));
143   memcpy (p, a->as_u8, sizeof (a->as_u8));
144 }
145
146 static void unserialize_ip4_address (serialize_main_t * m, va_list * va)
147 {
148   ip4_address_t * a = va_arg (*va, ip4_address_t *);
149   u8 * p = unserialize_get (m, sizeof (a->as_u8));
150   memcpy (a->as_u8, p, sizeof (a->as_u8));
151 }
152
153 static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
154 {
155   ip4_address_t * a = va_arg (*va, ip4_address_t *);
156   u32 l = va_arg (*va, u32);
157   u32 n_bytes = (l / 8) + ((l % 8) != 0);
158   u8 * p = serialize_get (m, 1 + n_bytes);
159   ASSERT (l <= 32);
160   p[0] = l;
161   memcpy (p + 1, a->as_u8, n_bytes);
162 }
163
164 static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
165 {
166   ip4_address_t * a = va_arg (*va, ip4_address_t *);
167   u32 * al = va_arg (*va, u32 *);
168   u8 * p = unserialize_get (m, 1);
169   u32 l, n_bytes;
170
171   al[0] = l = p[0];
172   ASSERT (l <= 32);
173   n_bytes = (l / 8) + ((l % 8) != 0);
174
175   if (n_bytes)
176     {
177       p = unserialize_get (m, n_bytes);
178       memcpy (a->as_u8, p, n_bytes);
179     }
180 }
181
182 static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
183 {
184   ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *);
185     
186   serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id);
187   serialize_likely_small_unsigned_integer (m, a->flags);
188   serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length);
189   serialize_likely_small_unsigned_integer (m, a->adj_index);
190   serialize_likely_small_unsigned_integer (m, a->n_add_adj);
191   if (a->n_add_adj > 0)
192     serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj);
193 }
194
195 /* Serialized adjacencies for arp/rewrite do not send graph next_index
196    since graph hookup is not guaranteed to be the same for both sides
197    of serialize/unserialize. */
198 static void
199 unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm,
200                                            ip_adjacency_t * adj,
201                                            u32 n_adj)
202 {
203   vnet_main_t * vnm = vnet_get_main();
204   u32 i, ni, sw_if_index, is_arp;
205   vnet_hw_interface_t * hw;
206
207   for (i = 0; i < n_adj; i++)
208     {
209       switch (adj[i].lookup_next_index)
210         {
211         case IP_LOOKUP_NEXT_REWRITE:
212         case IP_LOOKUP_NEXT_ARP:
213           is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP;
214           sw_if_index = adj[i].rewrite_header.sw_if_index;
215           hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
216           ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index;
217           adj[i].rewrite_header.node_index = ni;
218           adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index);
219           if (is_arp)
220             vnet_rewrite_for_sw_interface
221               (vnm,
222                VNET_L3_PACKET_TYPE_ARP,
223                sw_if_index,
224                ni,
225                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
226                &adj[i].rewrite_header,
227                sizeof (adj->rewrite_data));
228           break;
229
230         default:
231           break;
232         }
233     }
234 }
235
236 static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
237 {
238   ip4_main_t * i4m = &ip4_main;
239   ip4_add_del_route_args_t a;
240     
241   a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m);
242   a.flags = unserialize_likely_small_unsigned_integer (m);
243   unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length);
244   a.adj_index = unserialize_likely_small_unsigned_integer (m);
245   a.n_add_adj = unserialize_likely_small_unsigned_integer (m);
246   a.add_adj = 0;
247   if (a.n_add_adj > 0)
248     {
249       vec_resize (a.add_adj, a.n_add_adj);
250       unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj);
251       unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(), 
252                                                  a.add_adj, a.n_add_adj);
253     }
254
255   /* Prevent re-re-distribution. */
256   a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE;
257
258   ip4_add_del_route (i4m, &a);
259
260   vec_free (a.add_adj);
261 }
262
263 MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = {
264   .name = "vnet_ip4_add_del_route",
265   .serialize = serialize_ip4_add_del_route_msg,
266   .unserialize = unserialize_ip4_add_del_route_msg,
267 };
268
269 static void
270 ip4_fib_set_adj_index (ip4_main_t * im,
271                        ip4_fib_t * fib,
272                        u32 flags,
273                        u32 dst_address_u32,
274                        u32 dst_address_length,
275                        u32 adj_index)
276 {
277   ip_lookup_main_t * lm = &im->lookup_main;
278   uword * hash;
279
280   if (vec_bytes(fib->old_hash_values))
281     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
282   if (vec_bytes(fib->new_hash_values))
283     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
284   fib->new_hash_values[0] = adj_index;
285
286   /* Make sure adj index is valid. */
287   if (CLIB_DEBUG > 0)
288     (void) ip_get_adjacency (lm, adj_index);
289
290   hash = fib->adj_index_by_dst_address[dst_address_length];
291
292   hash = _hash_set3 (hash, dst_address_u32,
293                      fib->new_hash_values,
294                      fib->old_hash_values);
295
296   fib->adj_index_by_dst_address[dst_address_length] = hash;
297
298   if (vec_len (im->add_del_route_callbacks) > 0)
299     {
300       ip4_add_del_route_callback_t * cb;
301       ip4_address_t d;
302       uword * p;
303
304       d.data_u32 = dst_address_u32;
305       vec_foreach (cb, im->add_del_route_callbacks)
306         if ((flags & cb->required_flags) == cb->required_flags)
307           cb->function (im, cb->function_opaque,
308                         fib, flags,
309                         &d, dst_address_length,
310                         fib->old_hash_values,
311                         fib->new_hash_values);
312
313       p = hash_get (hash, dst_address_u32);
314       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
315     }
316 }
317
318 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
319 {
320   vlib_main_t * vm = vlib_get_main();
321   ip_lookup_main_t * lm = &im->lookup_main;
322   ip4_fib_t * fib;
323   u32 dst_address, dst_address_length, adj_index, old_adj_index;
324   uword * hash, is_del;
325   ip4_add_del_route_callback_t * cb;
326
327   if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
328     {
329       u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
330       mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer,
331                      &ip4_add_del_route_msg, a);
332       return;
333     }
334
335   /* Either create new adjacency or use given one depending on arguments. */
336   if (a->n_add_adj > 0)
337     {
338       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
339       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
340     }
341   else
342     adj_index = a->adj_index;
343
344   dst_address = a->dst_address.data_u32;
345   dst_address_length = a->dst_address_length;
346   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
347
348   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
349   dst_address &= im->fib_masks[dst_address_length];
350
351   if (! fib->adj_index_by_dst_address[dst_address_length])
352     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
353
354   hash = fib->adj_index_by_dst_address[dst_address_length];
355
356   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
357
358   if (is_del)
359     {
360       fib->old_hash_values[0] = ~0;
361       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
362       fib->adj_index_by_dst_address[dst_address_length] = hash;
363
364       if (vec_len (im->add_del_route_callbacks) > 0
365           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
366         {
367           fib->new_hash_values[0] = ~0;
368           vec_foreach (cb, im->add_del_route_callbacks)
369             if ((a->flags & cb->required_flags) == cb->required_flags)
370               cb->function (im, cb->function_opaque,
371                             fib, a->flags,
372                             &a->dst_address, dst_address_length,
373                             fib->old_hash_values,
374                             fib->new_hash_values);
375         }
376     }
377   else
378     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
379                            adj_index);
380
381   old_adj_index = fib->old_hash_values[0];
382
383   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
384                                is_del ? old_adj_index : adj_index,
385                                is_del);
386
387   /* Delete old adjacency index if present and changed. */
388   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
389       && old_adj_index != ~0
390       && old_adj_index != adj_index)
391     ip_del_adjacency (lm, old_adj_index);
392 }
393
394 static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
395 {
396   u32 flags = va_arg (*va, u32);
397   ip4_address_t * dst_address = va_arg (*va, ip4_address_t *);
398   u32 dst_address_length = va_arg (*va, u32);
399   ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *);
400   u32 next_hop_sw_if_index = va_arg (*va, u32);
401   u32 next_hop_weight = va_arg (*va, u32);
402
403   serialize_likely_small_unsigned_integer (m, flags);
404   serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length);
405   serialize (m, serialize_ip4_address, next_hop_address);
406   serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index);
407   serialize_likely_small_unsigned_integer (m, next_hop_weight);
408 }
409
410 static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
411 {
412   ip4_main_t * im = &ip4_main;
413   u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight;
414   ip4_address_t dst_address, next_hop_address;
415
416   flags = unserialize_likely_small_unsigned_integer (m);
417   unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length);
418   unserialize (m, unserialize_ip4_address, &next_hop_address);
419   next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m);
420   next_hop_weight = unserialize_likely_small_unsigned_integer (m);
421
422   ip4_add_del_route_next_hop
423     (im,
424      flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE,
425      &dst_address,
426      dst_address_length,
427      &next_hop_address,
428      next_hop_sw_if_index,
429      next_hop_weight, (u32)~0, 
430      (u32)~0 /* explicit FIB index */);
431 }
432
433 MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = {
434   .name = "vnet_ip4_add_del_route_next_hop",
435   .serialize = serialize_ip4_add_del_route_next_hop_msg,
436   .unserialize = unserialize_ip4_add_del_route_next_hop_msg,
437 };
438
439 void
440 ip4_add_del_route_next_hop (ip4_main_t * im,
441                             u32 flags,
442                             ip4_address_t * dst_address,
443                             u32 dst_address_length,
444                             ip4_address_t * next_hop,
445                             u32 next_hop_sw_if_index,
446                             u32 next_hop_weight, u32 adj_index, 
447                             u32 explicit_fib_index)
448 {
449   vnet_main_t * vnm = vnet_get_main();
450   vlib_main_t * vm = vlib_get_main();
451   ip_lookup_main_t * lm = &im->lookup_main;
452   u32 fib_index;
453   ip4_fib_t * fib;
454   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
455   u32 dst_adj_index, nh_adj_index;
456   uword * dst_hash, * dst_result;
457   uword * nh_hash, * nh_result;
458   ip_adjacency_t * dst_adj;
459   ip_multipath_adjacency_t * old_mp, * new_mp;
460   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
461   int is_interface_next_hop;
462   clib_error_t * error = 0;
463
464   if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
465     {
466       u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
467       mc_serialize2 (vm->mc_main,
468                      multiple_messages_per_vlib_buffer,
469                      &ip4_add_del_route_next_hop_msg,
470                      flags,
471                      dst_address, dst_address_length,
472                      next_hop, next_hop_sw_if_index, next_hop_weight);
473       return;
474     }
475
476   if (explicit_fib_index == (u32)~0)
477       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
478   else
479       fib_index = explicit_fib_index;
480
481   fib = vec_elt_at_index (im->fibs, fib_index);
482   
483   /* Lookup next hop to be added or deleted. */
484   is_interface_next_hop = next_hop->data_u32 == 0;
485   if (adj_index == (u32)~0)
486     {
487       if (is_interface_next_hop)
488         {
489           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
490           if (nh_result)
491             nh_adj_index = *nh_result;
492           else
493             {
494               ip_adjacency_t * adj;
495               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
496                                       &nh_adj_index);
497               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
498               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
499               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
500             }
501         }
502       else
503         {
504           nh_hash = fib->adj_index_by_dst_address[32];
505           nh_result = hash_get (nh_hash, next_hop->data_u32);
506           
507           /* Next hop must be known. */
508           if (! nh_result)
509             {
510               vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
511               error = clib_error_return (0, "next-hop %U/32 not in FIB",
512                                          format_ip4_address, next_hop);
513               goto done;
514             }
515           nh_adj_index = *nh_result;
516         }
517     }
518   else
519     {
520       nh_adj_index = adj_index;
521     }
522   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
523   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
524
525   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
526   dst_result = hash_get (dst_hash, dst_address_u32);
527   if (dst_result)
528     {
529       dst_adj_index = dst_result[0];
530       dst_adj = ip_get_adjacency (lm, dst_adj_index);
531     }
532   else
533     {
534       /* For deletes destination must be known. */
535       if (is_del)
536         {
537           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
538           error = clib_error_return (0, "unknown destination %U/%d",
539                                      format_ip4_address, dst_address,
540                                      dst_address_length);
541           goto done;
542         }
543
544       dst_adj_index = ~0;
545       dst_adj = 0;
546     }
547
548   /* Ignore adds of X/32 with next hop of X. */
549   if (! is_del
550       && dst_address_length == 32
551       && dst_address->data_u32 == next_hop->data_u32 
552       && adj_index != (u32)~0)
553     {
554       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
555       error = clib_error_return (0, "prefix matches next hop %U/%d",
556                                  format_ip4_address, dst_address,
557                                  dst_address_length);
558       goto done;
559     }
560
561   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
562
563   if (! ip_multipath_adjacency_add_del_next_hop
564       (lm, is_del,
565        old_mp_adj_index,
566        nh_adj_index,
567        next_hop_weight,
568        &new_mp_adj_index))
569     {
570       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
571       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
572                                  format_ip4_address, next_hop);
573       goto done;
574     }
575   
576   old_mp = new_mp = 0;
577   if (old_mp_adj_index != ~0)
578     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
579   if (new_mp_adj_index != ~0)
580     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
581
582   if (old_mp != new_mp)
583     {
584       ip4_add_del_route_args_t a;
585       a.table_index_or_table_id = fib_index;
586       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
587                  | IP4_ROUTE_FLAG_FIB_INDEX
588                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
589                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
590       a.dst_address = dst_address[0];
591       a.dst_address_length = dst_address_length;
592       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
593       a.add_adj = 0;
594       a.n_add_adj = 0;
595
596       ip4_add_del_route (im, &a);
597     }
598
599  done:
600   if (error)
601     clib_error_report (error);
602 }
603
604 void *
605 ip4_get_route (ip4_main_t * im,
606                u32 table_index_or_table_id,
607                u32 flags,
608                u8 * address,
609                u32 address_length)
610 {
611   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
612   u32 dst_address = * (u32 *) address;
613   uword * hash, * p;
614
615   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
616   dst_address &= im->fib_masks[address_length];
617
618   hash = fib->adj_index_by_dst_address[address_length];
619   p = hash_get (hash, dst_address);
620   return (void *) p;
621 }
622
623 void
624 ip4_foreach_matching_route (ip4_main_t * im,
625                             u32 table_index_or_table_id,
626                             u32 flags,
627                             ip4_address_t * address,
628                             u32 address_length,
629                             ip4_address_t ** results,
630                             u8 ** result_lengths)
631 {
632   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
633   u32 dst_address = address->data_u32;
634   u32 this_length = address_length;
635   
636   if (*results)
637     _vec_len (*results) = 0;
638   if (*result_lengths)
639     _vec_len (*result_lengths) = 0;
640
641   while (this_length <= 32 && vec_len (results) == 0)
642     {
643       uword k, v;
644       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
645         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
646           {
647             ip4_address_t a;
648             a.data_u32 = k;
649             vec_add1 (*results, a);
650             vec_add1 (*result_lengths, this_length);
651           }
652       }));
653
654       this_length++;
655     }
656 }
657
658 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
659                                   u32 table_index_or_table_id,
660                                   u32 flags)
661 {
662   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
663   ip_lookup_main_t * lm = &im->lookup_main;
664   u32 i, l;
665   ip4_address_t a;
666   ip4_add_del_route_callback_t * cb;
667   static ip4_address_t * to_delete;
668
669   if (lm->n_adjacency_remaps == 0)
670     return;
671
672   for (l = 0; l <= 32; l++)
673     {
674       hash_pair_t * p;
675       uword * hash = fib->adj_index_by_dst_address[l];
676
677       if (hash_elts (hash) == 0)
678         continue;
679
680       if (to_delete)
681         _vec_len (to_delete) = 0;
682
683       hash_foreach_pair (p, hash, ({
684         u32 adj_index = p->value[0];
685         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
686
687         if (m)
688           {
689             /* Record destination address from hash key. */
690             a.data_u32 = p->key;
691
692             /* New adjacency points to nothing: so delete prefix. */
693             if (m == ~0)
694               vec_add1 (to_delete, a);
695             else
696               {
697                 /* Remap to new adjacency. */
698                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
699
700                 /* Set new adjacency value. */
701                 fib->new_hash_values[0] = p->value[0] = m - 1;
702
703                 vec_foreach (cb, im->add_del_route_callbacks)
704                   if ((flags & cb->required_flags) == cb->required_flags)
705                     cb->function (im, cb->function_opaque,
706                                   fib, flags | IP4_ROUTE_FLAG_ADD,
707                                   &a, l,
708                                   fib->old_hash_values,
709                                   fib->new_hash_values);
710               }
711           }
712       }));
713
714       fib->new_hash_values[0] = ~0;
715       for (i = 0; i < vec_len (to_delete); i++)
716         {
717           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
718           vec_foreach (cb, im->add_del_route_callbacks)
719             if ((flags & cb->required_flags) == cb->required_flags)
720               cb->function (im, cb->function_opaque,
721                             fib, flags | IP4_ROUTE_FLAG_DEL,
722                             &a, l,
723                             fib->old_hash_values,
724                             fib->new_hash_values);
725         }
726     }
727
728   /* Also remap adjacencies in mtrie. */
729   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
730
731   /* Reset mapping table. */
732   vec_zero (lm->adjacency_remap_table);
733
734   /* All remaps have been performed. */
735   lm->n_adjacency_remaps = 0;
736 }
737
738 void ip4_delete_matching_routes (ip4_main_t * im,
739                                  u32 table_index_or_table_id,
740                                  u32 flags,
741                                  ip4_address_t * address,
742                                  u32 address_length)
743 {
744   static ip4_address_t * matching_addresses;
745   static u8 * matching_address_lengths;
746   u32 l, i;
747   ip4_add_del_route_args_t a;
748
749   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
750   a.table_index_or_table_id = table_index_or_table_id;
751   a.adj_index = ~0;
752   a.add_adj = 0;
753   a.n_add_adj = 0;
754
755   for (l = address_length + 1; l <= 32; l++)
756     {
757       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
758                                   address,
759                                   l,
760                                   &matching_addresses,
761                                   &matching_address_lengths);
762       for (i = 0; i < vec_len (matching_addresses); i++)
763         {
764           a.dst_address = matching_addresses[i];
765           a.dst_address_length = matching_address_lengths[i];
766           ip4_add_del_route (im, &a);
767         }
768     }
769
770   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
771 }
772
773 always_inline uword
774 ip4_lookup_inline (vlib_main_t * vm,
775                    vlib_node_runtime_t * node,
776                    vlib_frame_t * frame,
777                    int lookup_for_responses_to_locally_received_packets)
778 {
779   ip4_main_t * im = &ip4_main;
780   ip_lookup_main_t * lm = &im->lookup_main;
781   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
782   u32 n_left_from, n_left_to_next, * from, * to_next;
783   ip_lookup_next_t next;
784   u32 cpu_index = os_get_cpu_number();
785
786   from = vlib_frame_vector_args (frame);
787   n_left_from = frame->n_vectors;
788   next = node->cached_next_index;
789
790   while (n_left_from > 0)
791     {
792       vlib_get_next_frame (vm, node, next,
793                            to_next, n_left_to_next);
794
795       while (n_left_from >= 4 && n_left_to_next >= 2)
796         {
797           vlib_buffer_t * p0, * p1;
798           ip4_header_t * ip0, * ip1;
799           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
800           ip_lookup_next_t next0, next1;
801           ip_adjacency_t * adj0, * adj1;
802           ip4_fib_mtrie_t * mtrie0, * mtrie1;
803           ip4_fib_mtrie_leaf_t leaf0, leaf1;
804           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
805           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
806           u32 flow_hash_config0, flow_hash_config1;
807           u32 hash_c0, hash_c1;
808           u32 wrong_next;
809
810           /* Prefetch next iteration. */
811           {
812             vlib_buffer_t * p2, * p3;
813
814             p2 = vlib_get_buffer (vm, from[2]);
815             p3 = vlib_get_buffer (vm, from[3]);
816
817             vlib_prefetch_buffer_header (p2, LOAD);
818             vlib_prefetch_buffer_header (p3, LOAD);
819
820             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
821             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
822           }
823
824           pi0 = to_next[0] = from[0];
825           pi1 = to_next[1] = from[1];
826
827           p0 = vlib_get_buffer (vm, pi0);
828           p1 = vlib_get_buffer (vm, pi1);
829
830           ip0 = vlib_buffer_get_current (p0);
831           ip1 = vlib_buffer_get_current (p1);
832
833           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
834           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
835           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
836             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
837           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
838             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
839
840
841           if (! lookup_for_responses_to_locally_received_packets)
842             {
843               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
844               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
845
846               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
847
848               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
849               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
850             }
851
852           tcp0 = (void *) (ip0 + 1);
853           tcp1 = (void *) (ip1 + 1);
854
855           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
856                          || ip0->protocol == IP_PROTOCOL_UDP);
857           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
858                          || ip1->protocol == IP_PROTOCOL_UDP);
859
860           if (! lookup_for_responses_to_locally_received_packets)
861             {
862               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
863               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
864             }
865
866           if (! lookup_for_responses_to_locally_received_packets)
867             {
868               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
869               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
870             }
871
872           if (! lookup_for_responses_to_locally_received_packets)
873             {
874               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
875               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
876             }
877
878           if (lookup_for_responses_to_locally_received_packets)
879             {
880               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
881               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
882             }
883           else
884             {
885               /* Handle default route. */
886               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
887               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
888
889               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
890               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
891             }
892
893           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
894                                                            &ip0->dst_address,
895                                                            /* no_default_route */ 0));
896           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
897                                                            &ip1->dst_address,
898                                                            /* no_default_route */ 0));
899           adj0 = ip_get_adjacency (lm, adj_index0);
900           adj1 = ip_get_adjacency (lm, adj_index1);
901
902           next0 = adj0->lookup_next_index;
903           next1 = adj1->lookup_next_index;
904
905           /* Use flow hash to compute multipath adjacency. */
906           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
907           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
908           if (PREDICT_FALSE (adj0->n_adj > 1))
909             {
910               flow_hash_config0 = 
911                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
912               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
913                 ip4_compute_flow_hash (ip0, flow_hash_config0);
914             }
915           if (PREDICT_FALSE(adj1->n_adj > 1))
916             {
917               flow_hash_config1 = 
918                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
919               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
920                 ip4_compute_flow_hash (ip1, flow_hash_config1);
921             }
922
923           ASSERT (adj0->n_adj > 0);
924           ASSERT (adj1->n_adj > 0);
925           ASSERT (is_pow2 (adj0->n_adj));
926           ASSERT (is_pow2 (adj1->n_adj));
927           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
928           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
929
930           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
931           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
932
933           vlib_increment_combined_counter 
934               (cm, cpu_index, adj_index0, 1,
935                vlib_buffer_length_in_chain (vm, p0) 
936                + sizeof(ethernet_header_t));
937           vlib_increment_combined_counter 
938               (cm, cpu_index, adj_index1, 1,
939                vlib_buffer_length_in_chain (vm, p1)
940                + sizeof(ethernet_header_t));
941
942           from += 2;
943           to_next += 2;
944           n_left_to_next -= 2;
945           n_left_from -= 2;
946
947           wrong_next = (next0 != next) + 2*(next1 != next);
948           if (PREDICT_FALSE (wrong_next != 0))
949             {
950               switch (wrong_next)
951                 {
952                 case 1:
953                   /* A B A */
954                   to_next[-2] = pi1;
955                   to_next -= 1;
956                   n_left_to_next += 1;
957                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
958                   break;
959
960                 case 2:
961                   /* A A B */
962                   to_next -= 1;
963                   n_left_to_next += 1;
964                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
965                   break;
966
967                 case 3:
968                   /* A B C */
969                   to_next -= 2;
970                   n_left_to_next += 2;
971                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
972                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
973                   if (next0 == next1)
974                     {
975                       /* A B B */
976                       vlib_put_next_frame (vm, node, next, n_left_to_next);
977                       next = next1;
978                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
979                     }
980                 }
981             }
982         }
983     
984       while (n_left_from > 0 && n_left_to_next > 0)
985         {
986           vlib_buffer_t * p0;
987           ip4_header_t * ip0;
988           __attribute__((unused)) tcp_header_t * tcp0;
989           ip_lookup_next_t next0;
990           ip_adjacency_t * adj0;
991           ip4_fib_mtrie_t * mtrie0;
992           ip4_fib_mtrie_leaf_t leaf0;
993           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
994           u32 flow_hash_config0, hash_c0;
995
996           pi0 = from[0];
997           to_next[0] = pi0;
998
999           p0 = vlib_get_buffer (vm, pi0);
1000
1001           ip0 = vlib_buffer_get_current (p0);
1002
1003           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1004           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
1005             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
1006
1007           if (! lookup_for_responses_to_locally_received_packets)
1008             {
1009               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1010
1011               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1012
1013               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
1014             }
1015
1016           tcp0 = (void *) (ip0 + 1);
1017
1018           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
1019                          || ip0->protocol == IP_PROTOCOL_UDP);
1020
1021           if (! lookup_for_responses_to_locally_received_packets)
1022             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
1023
1024           if (! lookup_for_responses_to_locally_received_packets)
1025             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
1026
1027           if (! lookup_for_responses_to_locally_received_packets)
1028             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
1029
1030           if (lookup_for_responses_to_locally_received_packets)
1031             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1032           else
1033             {
1034               /* Handle default route. */
1035               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1036               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1037             }
1038
1039           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1040                                                            &ip0->dst_address,
1041                                                            /* no_default_route */ 0));
1042
1043           adj0 = ip_get_adjacency (lm, adj_index0);
1044
1045           next0 = adj0->lookup_next_index;
1046
1047           /* Use flow hash to compute multipath adjacency. */
1048           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1049           if (PREDICT_FALSE(adj0->n_adj > 1))
1050             {
1051               flow_hash_config0 = 
1052                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1053
1054               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1055                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1056             }
1057
1058           ASSERT (adj0->n_adj > 0);
1059           ASSERT (is_pow2 (adj0->n_adj));
1060           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1061
1062           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1063
1064           vlib_increment_combined_counter 
1065               (cm, cpu_index, adj_index0, 1,
1066                vlib_buffer_length_in_chain (vm, p0)
1067                + sizeof(ethernet_header_t));
1068
1069           from += 1;
1070           to_next += 1;
1071           n_left_to_next -= 1;
1072           n_left_from -= 1;
1073
1074           if (PREDICT_FALSE (next0 != next))
1075             {
1076               n_left_to_next += 1;
1077               vlib_put_next_frame (vm, node, next, n_left_to_next);
1078               next = next0;
1079               vlib_get_next_frame (vm, node, next,
1080                                    to_next, n_left_to_next);
1081               to_next[0] = pi0;
1082               to_next += 1;
1083               n_left_to_next -= 1;
1084             }
1085         }
1086
1087       vlib_put_next_frame (vm, node, next, n_left_to_next);
1088     }
1089
1090   return frame->n_vectors;
1091 }
1092
1093 static uword
1094 ip4_lookup (vlib_main_t * vm,
1095             vlib_node_runtime_t * node,
1096             vlib_frame_t * frame)
1097 {
1098   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
1099
1100 }
1101
1102 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1103                                         ip_adjacency_t * adj,
1104                                         u32 sw_if_index,
1105                                         u32 if_address_index)
1106 {
1107   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1108   ip_lookup_next_t n;
1109   vnet_l3_packet_type_t packet_type;
1110   u32 node_index;
1111
1112   if (hw->hw_class_index == ethernet_hw_interface_class.index
1113       || hw->hw_class_index == srp_hw_interface_class.index)
1114     {
1115       /* 
1116        * We have a bit of a problem in this case. ip4-arp uses
1117        * the rewrite_header.next_index to hand pkts to the
1118        * indicated inteface output node. We can end up in
1119        * ip4_rewrite_local, too, which also pays attention to 
1120        * rewrite_header.next index. Net result: a hack in
1121        * ip4_rewrite_local...
1122        */
1123       n = IP_LOOKUP_NEXT_ARP;
1124       node_index = ip4_arp_node.index;
1125       adj->if_address_index = if_address_index;
1126       packet_type = VNET_L3_PACKET_TYPE_ARP;
1127     }
1128   else
1129     {
1130       n = IP_LOOKUP_NEXT_REWRITE;
1131       node_index = ip4_rewrite_node.index;
1132       packet_type = VNET_L3_PACKET_TYPE_IP4;
1133     }
1134
1135   adj->lookup_next_index = n;
1136   vnet_rewrite_for_sw_interface
1137     (vnm,
1138      packet_type,
1139      sw_if_index,
1140      node_index,
1141      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1142      &adj->rewrite_header,
1143      sizeof (adj->rewrite_data));
1144 }
1145
1146 static void
1147 ip4_add_interface_routes (u32 sw_if_index,
1148                           ip4_main_t * im, u32 fib_index,
1149                           ip_interface_address_t * a)
1150 {
1151   vnet_main_t * vnm = vnet_get_main();
1152   ip_lookup_main_t * lm = &im->lookup_main;
1153   ip_adjacency_t * adj;
1154   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1155   ip4_add_del_route_args_t x;
1156   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1157   u32 classify_table_index;
1158
1159   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1160   x.table_index_or_table_id = fib_index;
1161   x.flags = (IP4_ROUTE_FLAG_ADD
1162              | IP4_ROUTE_FLAG_FIB_INDEX
1163              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1164   x.dst_address = address[0];
1165   x.dst_address_length = a->address_length;
1166   x.n_add_adj = 0;
1167   x.add_adj = 0;
1168
1169   a->neighbor_probe_adj_index = ~0;
1170   if (a->address_length < 32)
1171     {
1172       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1173                               &x.adj_index);
1174       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1175       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1176       ip4_add_del_route (im, &x);
1177       a->neighbor_probe_adj_index = x.adj_index;
1178     }
1179   
1180   /* Add e.g. 1.1.1.1/32 as local to this host. */
1181   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1182                           &x.adj_index);
1183   
1184   classify_table_index = ~0;
1185   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1186     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1187   if (classify_table_index != (u32) ~0)
1188     {
1189       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1190       adj->classify_table_index = classify_table_index;
1191     }
1192   else
1193     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1194   
1195   adj->if_address_index = a - lm->if_address_pool;
1196   adj->rewrite_header.sw_if_index = sw_if_index;
1197   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1198   /* 
1199    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1200    * fail an RPF-ish check, but still go thru the rewrite code...
1201    */
1202   adj->rewrite_header.data_bytes = 0;
1203
1204   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1205   x.dst_address_length = 32;
1206   ip4_add_del_route (im, &x);
1207 }
1208
1209 static void
1210 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1211 {
1212   ip4_add_del_route_args_t x;
1213
1214   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1215   x.table_index_or_table_id = fib_index;
1216   x.flags = (IP4_ROUTE_FLAG_DEL
1217              | IP4_ROUTE_FLAG_FIB_INDEX
1218              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1219   x.dst_address = address[0];
1220   x.dst_address_length = address_length;
1221   x.adj_index = ~0;
1222   x.n_add_adj = 0;
1223   x.add_adj = 0;
1224
1225   if (address_length < 32)
1226     ip4_add_del_route (im, &x);
1227
1228   x.dst_address_length = 32;
1229   ip4_add_del_route (im, &x);
1230
1231   ip4_delete_matching_routes (im,
1232                               fib_index,
1233                               IP4_ROUTE_FLAG_FIB_INDEX,
1234                               address,
1235                               address_length);
1236 }
1237
1238 typedef struct {
1239     u32 sw_if_index;
1240     ip4_address_t address;
1241     u32 length;
1242 } ip4_interface_address_t;
1243
1244 static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1245 {
1246     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1247     u32 n = va_arg (*va, u32);
1248     u32 i;
1249     for (i = 0; i < n; i++) {
1250         serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index));
1251         serialize (m, serialize_ip4_address, &a[i].address);
1252         serialize_integer (m, a[i].length, sizeof (a[i].length));
1253     }
1254 }
1255
1256 static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1257 {
1258     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1259     u32 n = va_arg (*va, u32);
1260     u32 i;
1261     for (i = 0; i < n; i++) {
1262         unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index));
1263         unserialize (m, unserialize_ip4_address, &a[i].address);
1264         unserialize_integer (m, &a[i].length, sizeof (a[i].length));
1265     }
1266 }
1267
1268 static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1269 {
1270   ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1271   int is_del = va_arg (*va, int);
1272   serialize (m, serialize_vec_ip4_set_interface_address, a, 1);
1273   serialize_integer (m, is_del, sizeof (is_del));
1274 }
1275
1276 static clib_error_t *
1277 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1278                                         u32 sw_if_index,
1279                                         ip4_address_t * new_address,
1280                                         u32 new_length,
1281                                         u32 redistribute,
1282                                         u32 insert_routes,
1283                                         u32 is_del);
1284
1285 static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1286 {
1287   mc_main_t * mcm = va_arg (*va, mc_main_t *);
1288   vlib_main_t * vm = mcm->vlib_main;
1289   ip4_interface_address_t a;
1290   clib_error_t * error;
1291   int is_del;
1292
1293   unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1);
1294   unserialize_integer (m, &is_del, sizeof (is_del));
1295   error = ip4_add_del_interface_address_internal
1296     (vm, a.sw_if_index, &a.address, a.length,
1297      /* redistribute */ 0,
1298      /* insert_routes */ 1,
1299      is_del);
1300   if (error)
1301     clib_error_report (error);
1302 }
1303
1304 MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = {
1305   .name = "vnet_ip4_set_interface_address",
1306   .serialize = serialize_ip4_set_interface_address_msg,
1307   .unserialize = unserialize_ip4_set_interface_address_msg,
1308 };
1309
1310 static clib_error_t *
1311 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1312                                         u32 sw_if_index,
1313                                         ip4_address_t * address,
1314                                         u32 address_length,
1315                                         u32 redistribute,
1316                                         u32 insert_routes,
1317                                         u32 is_del)
1318 {
1319   vnet_main_t * vnm = vnet_get_main();
1320   ip4_main_t * im = &ip4_main;
1321   ip_lookup_main_t * lm = &im->lookup_main;
1322   clib_error_t * error = 0;
1323   u32 if_address_index, elts_before;
1324   ip4_address_fib_t ip4_af, * addr_fib = 0;
1325
1326   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1327   ip4_addr_fib_init (&ip4_af, address,
1328                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1329   vec_add1 (addr_fib, ip4_af);
1330
1331   /* When adding an address check that it does not conflict with an existing address. */
1332   if (! is_del)
1333     {
1334       ip_interface_address_t * ia;
1335       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1336                                     0 /* honor unnumbered */,
1337       ({
1338         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1339
1340         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1341             || ip4_destination_matches_route (im, x, address, address_length))
1342           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1343                                     format_ip4_address_and_length, address, address_length,
1344                                     format_ip4_address_and_length, x, ia->address_length,
1345                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1346       }));
1347     }
1348
1349   if (vm->mc_main && redistribute)
1350     {
1351       ip4_interface_address_t a;
1352       a.sw_if_index = sw_if_index;
1353       a.address = address[0];
1354       a.length = address_length;
1355       mc_serialize (vm->mc_main, &ip4_set_interface_address_msg, 
1356                     &a, (int)is_del);
1357       goto done;
1358     }
1359     
1360   elts_before = pool_elts (lm->if_address_pool);
1361
1362   error = ip_interface_address_add_del
1363     (lm,
1364      sw_if_index,
1365      addr_fib,
1366      address_length,
1367      is_del,
1368      &if_address_index);
1369   if (error)
1370     goto done;
1371   
1372   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1373     {
1374       if (is_del)
1375         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1376                                   address_length);
1377       
1378       else
1379           ip4_add_interface_routes (sw_if_index,
1380                                     im, ip4_af.fib_index,
1381                                     pool_elt_at_index 
1382                                     (lm->if_address_pool, if_address_index));
1383     }
1384
1385   /* If pool did not grow/shrink: add duplicate address. */
1386   if (elts_before != pool_elts (lm->if_address_pool))
1387     {
1388       ip4_add_del_interface_address_callback_t * cb;
1389       vec_foreach (cb, im->add_del_interface_address_callbacks)
1390         cb->function (im, cb->function_opaque, sw_if_index,
1391                       address, address_length,
1392                       if_address_index,
1393                       is_del);
1394     }
1395
1396  done:
1397   vec_free (addr_fib);
1398   return error;
1399 }
1400
1401 clib_error_t *
1402 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1403                                ip4_address_t * address, u32 address_length,
1404                                u32 is_del)
1405 {
1406   return ip4_add_del_interface_address_internal
1407     (vm, sw_if_index, address, address_length,
1408      /* redistribute */ 1,
1409      /* insert_routes */ 1,
1410      is_del);
1411 }
1412
1413 static void serialize_ip4_fib (serialize_main_t * m, va_list * va)
1414 {
1415   ip4_fib_t * f = va_arg (*va, ip4_fib_t *);
1416   u32 l, dst, adj_index;
1417
1418   serialize_integer (m, f->table_id, sizeof (f->table_id));
1419   for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++)
1420     {
1421       u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]);
1422
1423       serialize_integer (m, n_elts, sizeof (n_elts));
1424       hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({
1425         ip4_address_t tmp;
1426         tmp.as_u32 = dst;
1427         serialize (m, serialize_ip4_address, &tmp);
1428         serialize_integer (m, adj_index, sizeof (adj_index));
1429       }));
1430     }
1431 }
1432
1433 static void unserialize_ip4_fib (serialize_main_t * m, va_list * va)
1434 {
1435   ip4_add_del_route_args_t a;
1436   u32 i;
1437
1438   a.flags = (IP4_ROUTE_FLAG_ADD
1439              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE
1440              | IP4_ROUTE_FLAG_TABLE_ID);
1441   a.n_add_adj = 0;
1442   a.add_adj = 0;
1443
1444   unserialize_integer (m, &a.table_index_or_table_id,
1445                        sizeof (a.table_index_or_table_id));
1446
1447   for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++)
1448     {
1449       u32 n_elts;
1450       unserialize_integer (m, &n_elts, sizeof (u32));
1451       a.dst_address_length = i;
1452       while (n_elts > 0)
1453         {
1454           unserialize (m, unserialize_ip4_address, &a.dst_address);
1455           unserialize_integer (m, &a.adj_index, sizeof (a.adj_index));
1456           ip4_add_del_route (&ip4_main, &a);
1457           n_elts--;
1458         }
1459     }
1460 }
1461
1462 void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1463 {
1464   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
1465   vnet_interface_main_t * vim = &vnm->interface_main;
1466   vnet_sw_interface_t * si;
1467   ip4_main_t * i4m = &ip4_main;
1468   ip4_interface_address_t * as = 0, * a;
1469
1470   /* Download adjacency tables & multipath stuff. */
1471   serialize (m, serialize_ip_lookup_main, &i4m->lookup_main);
1472
1473   /* FIBs. */
1474   {
1475     ip4_fib_t * f;
1476     u32 n_fibs = vec_len (i4m->fibs);
1477     serialize_integer (m, n_fibs, sizeof (n_fibs));
1478     vec_foreach (f, i4m->fibs)
1479       serialize (m, serialize_ip4_fib, f);
1480   }
1481
1482   /* FIB interface config. */
1483   vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32);
1484
1485   /* Interface ip4 addresses. */
1486   pool_foreach (si, vim->sw_interfaces, ({
1487     u32 sw_if_index = si->sw_if_index;
1488     ip_interface_address_t * ia;
1489     foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index, 
1490                                   0 /* honor unnumbered */,
1491     ({
1492       ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia);
1493       vec_add2 (as, a, 1);
1494       a->address = x[0];
1495       a->length = ia->address_length;
1496       a->sw_if_index = sw_if_index;
1497     }));
1498   }));
1499   vec_serialize (m, as, serialize_vec_ip4_set_interface_address);
1500   vec_free (as);
1501 }
1502
1503 void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1504 {
1505   vlib_main_t * vm = va_arg (*va, vlib_main_t *);
1506   ip4_main_t * i4m = &ip4_main;
1507   ip4_interface_address_t * as = 0, * a;
1508
1509   unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main);
1510
1511   {
1512     ip_adjacency_t * adj, * adj_heap;
1513     u32 n_adj;
1514     adj_heap = i4m->lookup_main.adjacency_heap;
1515     heap_foreach (adj, n_adj, adj_heap, ({
1516       unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj);
1517       ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0);
1518     }));
1519   }
1520
1521   /* FIBs */
1522   {
1523     u32 i, n_fibs;
1524     unserialize_integer (m, &n_fibs, sizeof (n_fibs));
1525     for (i = 0; i < n_fibs; i++)
1526       unserialize (m, unserialize_ip4_fib);
1527   }
1528
1529   vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32);
1530
1531   vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address);
1532   vec_foreach (a, as) {
1533     ip4_add_del_interface_address_internal
1534       (vm, a->sw_if_index, &a->address, a->length,
1535        /* redistribute */ 0,
1536        /* insert_routes */ 0,
1537        /* is_del */ 0);
1538   }
1539   vec_free (as);
1540 }
1541
1542 static clib_error_t *
1543 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1544                                 u32 sw_if_index,
1545                                 u32 flags)
1546 {
1547   ip4_main_t * im = &ip4_main;
1548   ip_interface_address_t * ia;
1549   ip4_address_t * a;
1550   u32 is_admin_up, fib_index;
1551   
1552   /* Fill in lookup tables with default table (0). */
1553   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1554   
1555   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1556   
1557   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1558   
1559   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1560
1561   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1562                                 0 /* honor unnumbered */,
1563   ({
1564     a = ip_interface_address_get_address (&im->lookup_main, ia);
1565     if (is_admin_up)
1566       ip4_add_interface_routes (sw_if_index,
1567                                 im, fib_index,
1568                                 ia);
1569     else
1570       ip4_del_interface_routes (im, fib_index,
1571                                 a, ia->address_length);
1572   }));
1573
1574   return 0;
1575 }
1576  
1577 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1578
1579 static clib_error_t *
1580 ip4_sw_interface_add_del (vnet_main_t * vnm,
1581                           u32 sw_if_index,
1582                           u32 is_add)
1583 {
1584   vlib_main_t * vm = vnm->vlib_main;
1585   ip4_main_t * im = &ip4_main;
1586   ip_lookup_main_t * lm = &im->lookup_main;
1587   u32 ci, cast;
1588
1589   for (cast = 0; cast < VNET_N_CAST; cast++)
1590     {
1591       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1592       vnet_config_main_t * vcm = &cm->config_main;
1593
1594       if (! vcm->node_index_by_feature_index)
1595         {
1596           if (cast == VNET_UNICAST)
1597             {
1598               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1599               static char * feature_nodes[] = {
1600                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1601                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1602                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1603                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1604                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1605                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1606               };
1607
1608               vnet_config_init (vm, vcm,
1609                                 start_nodes, ARRAY_LEN (start_nodes),
1610                                 feature_nodes, ARRAY_LEN (feature_nodes));
1611             }
1612           else
1613             {
1614               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1615               static char * feature_nodes[] = {
1616                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1617                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1618               };
1619
1620               vnet_config_init (vm, vcm,
1621                                 start_nodes, ARRAY_LEN (start_nodes),
1622                                 feature_nodes, ARRAY_LEN (feature_nodes));
1623             }
1624         }
1625
1626       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1627       ci = cm->config_index_by_sw_if_index[sw_if_index];
1628
1629       if (is_add)
1630         ci = vnet_config_add_feature (vm, vcm,
1631                                       ci,
1632                                       IP4_RX_FEATURE_LOOKUP,
1633                                       /* config data */ 0,
1634                                       /* # bytes of config data */ 0);
1635       else
1636         ci = vnet_config_del_feature (vm, vcm,
1637                                       ci,
1638                                       IP4_RX_FEATURE_LOOKUP,
1639                                       /* config data */ 0,
1640                                       /* # bytes of config data */ 0);
1641
1642       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1643     }
1644
1645   return /* no error */ 0;
1646 }
1647
1648 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1649
1650 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1651   .function = ip4_lookup,
1652   .name = "ip4-lookup",
1653   .vector_size = sizeof (u32),
1654
1655   .n_next_nodes = IP_LOOKUP_N_NEXT,
1656   .next_nodes = {
1657     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
1658     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
1659     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
1660     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
1661     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
1662     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
1663     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
1664     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
1665     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
1666     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
1667     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
1668     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
1669     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
1670   },
1671 };
1672
1673 /* Global IP4 main. */
1674 ip4_main_t ip4_main;
1675
1676 clib_error_t *
1677 ip4_lookup_init (vlib_main_t * vm)
1678 {
1679   ip4_main_t * im = &ip4_main;
1680   uword i;
1681
1682   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1683     {
1684       u32 m;
1685
1686       if (i < 32)
1687         m = pow2_mask (i) << (32 - i);
1688       else 
1689         m = ~0;
1690       im->fib_masks[i] = clib_host_to_net_u32 (m);
1691     }
1692
1693   /* Create FIB with index 0 and table id of 0. */
1694   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1695
1696   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1697
1698   {
1699     pg_node_t * pn;
1700     pn = pg_get_node (ip4_lookup_node.index);
1701     pn->unformat_edit = unformat_pg_ip4_header;
1702   }
1703
1704   {
1705     ethernet_arp_header_t h;
1706
1707     memset (&h, 0, sizeof (h));
1708
1709     /* Set target ethernet address to all zeros. */
1710     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1711
1712 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1713 #define _8(f,v) h.f = v;
1714     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1715     _16 (l3_type, ETHERNET_TYPE_IP4);
1716     _8 (n_l2_address_bytes, 6);
1717     _8 (n_l3_address_bytes, 4);
1718     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1719 #undef _16
1720 #undef _8
1721
1722     vlib_packet_template_init (vm,
1723                                &im->ip4_arp_request_packet_template,
1724                                /* data */ &h,
1725                                sizeof (h),
1726                                /* alloc chunk size */ 8,
1727                                "ip4 arp");
1728   }
1729
1730   return 0;
1731 }
1732
1733 VLIB_INIT_FUNCTION (ip4_lookup_init);
1734
1735 typedef struct {
1736   /* Adjacency taken. */
1737   u32 adj_index;
1738   u32 flow_hash;
1739   u32 fib_index;
1740
1741   /* Packet data, possibly *after* rewrite. */
1742   u8 packet_data[64 - 1*sizeof(u32)];
1743 } ip4_forward_next_trace_t;
1744
1745 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1746 {
1747   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1748   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1749   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1750   vnet_main_t * vnm = vnet_get_main();
1751   ip4_main_t * im = &ip4_main;
1752   ip_adjacency_t * adj;
1753   uword indent = format_get_indent (s);
1754
1755   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1756   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1757               t->fib_index, t->adj_index, format_ip_adjacency,
1758               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1759   switch (adj->lookup_next_index)
1760     {
1761     case IP_LOOKUP_NEXT_REWRITE:
1762       s = format (s, "\n%U%U",
1763                   format_white_space, indent,
1764                   format_ip_adjacency_packet_data,
1765                   vnm, &im->lookup_main, t->adj_index,
1766                   t->packet_data, sizeof (t->packet_data));
1767       break;
1768
1769     default:
1770       break;
1771     }
1772
1773   return s;
1774 }
1775
1776 /* Common trace function for all ip4-forward next nodes. */
1777 void
1778 ip4_forward_next_trace (vlib_main_t * vm,
1779                         vlib_node_runtime_t * node,
1780                         vlib_frame_t * frame,
1781                         vlib_rx_or_tx_t which_adj_index)
1782 {
1783   u32 * from, n_left;
1784   ip4_main_t * im = &ip4_main;
1785
1786   n_left = frame->n_vectors;
1787   from = vlib_frame_vector_args (frame);
1788   
1789   while (n_left >= 4)
1790     {
1791       u32 bi0, bi1;
1792       vlib_buffer_t * b0, * b1;
1793       ip4_forward_next_trace_t * t0, * t1;
1794
1795       /* Prefetch next iteration. */
1796       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1797       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1798
1799       bi0 = from[0];
1800       bi1 = from[1];
1801
1802       b0 = vlib_get_buffer (vm, bi0);
1803       b1 = vlib_get_buffer (vm, bi1);
1804
1805       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1806         {
1807           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1808           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1809           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1810           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1811                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1812           memcpy (t0->packet_data,
1813                   vlib_buffer_get_current (b0),
1814                   sizeof (t0->packet_data));
1815         }
1816       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1817         {
1818           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1819           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1820           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1821           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1822                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1823           memcpy (t1->packet_data,
1824                   vlib_buffer_get_current (b1),
1825                   sizeof (t1->packet_data));
1826         }
1827       from += 2;
1828       n_left -= 2;
1829     }
1830
1831   while (n_left >= 1)
1832     {
1833       u32 bi0;
1834       vlib_buffer_t * b0;
1835       ip4_forward_next_trace_t * t0;
1836
1837       bi0 = from[0];
1838
1839       b0 = vlib_get_buffer (vm, bi0);
1840
1841       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1842         {
1843           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1844           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1845           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1846           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1847                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1848           memcpy (t0->packet_data,
1849                   vlib_buffer_get_current (b0),
1850                   sizeof (t0->packet_data));
1851         }
1852       from += 1;
1853       n_left -= 1;
1854     }
1855 }
1856
1857 static uword
1858 ip4_drop_or_punt (vlib_main_t * vm,
1859                   vlib_node_runtime_t * node,
1860                   vlib_frame_t * frame,
1861                   ip4_error_t error_code)
1862 {
1863   u32 * buffers = vlib_frame_vector_args (frame);
1864   uword n_packets = frame->n_vectors;
1865
1866   vlib_error_drop_buffers (vm, node,
1867                            buffers,
1868                            /* stride */ 1,
1869                            n_packets,
1870                            /* next */ 0,
1871                            ip4_input_node.index,
1872                            error_code);
1873
1874   if (node->flags & VLIB_NODE_FLAG_TRACE)
1875     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1876
1877   return n_packets;
1878 }
1879
1880 static uword
1881 ip4_drop (vlib_main_t * vm,
1882           vlib_node_runtime_t * node,
1883           vlib_frame_t * frame)
1884 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1885
1886 static uword
1887 ip4_punt (vlib_main_t * vm,
1888           vlib_node_runtime_t * node,
1889           vlib_frame_t * frame)
1890 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1891
1892 static uword
1893 ip4_miss (vlib_main_t * vm,
1894           vlib_node_runtime_t * node,
1895           vlib_frame_t * frame)
1896 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1897
1898 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1899   .function = ip4_drop,
1900   .name = "ip4-drop",
1901   .vector_size = sizeof (u32),
1902
1903   .format_trace = format_ip4_forward_next_trace,
1904
1905   .n_next_nodes = 1,
1906   .next_nodes = {
1907     [0] = "error-drop",
1908   },
1909 };
1910
1911 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1912   .function = ip4_punt,
1913   .name = "ip4-punt",
1914   .vector_size = sizeof (u32),
1915
1916   .format_trace = format_ip4_forward_next_trace,
1917
1918   .n_next_nodes = 1,
1919   .next_nodes = {
1920     [0] = "error-punt",
1921   },
1922 };
1923
1924 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1925   .function = ip4_miss,
1926   .name = "ip4-miss",
1927   .vector_size = sizeof (u32),
1928
1929   .format_trace = format_ip4_forward_next_trace,
1930
1931   .n_next_nodes = 1,
1932   .next_nodes = {
1933     [0] = "error-drop",
1934   },
1935 };
1936
1937 /* Compute TCP/UDP/ICMP4 checksum in software. */
1938 u16
1939 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1940                               ip4_header_t * ip0)
1941 {
1942   ip_csum_t sum0;
1943   u32 ip_header_length, payload_length_host_byte_order;
1944   u32 n_this_buffer, n_bytes_left;
1945   u16 sum16;
1946   void * data_this_buffer;
1947   
1948   /* Initialize checksum with ip header. */
1949   ip_header_length = ip4_header_bytes (ip0);
1950   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1951   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1952
1953   if (BITS (uword) == 32)
1954     {
1955       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1956       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1957     }
1958   else
1959     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1960
1961   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1962   data_this_buffer = (void *) ip0 + ip_header_length;
1963   if (n_this_buffer + ip_header_length > p0->current_length)
1964     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1965   while (1)
1966     {
1967       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1968       n_bytes_left -= n_this_buffer;
1969       if (n_bytes_left == 0)
1970         break;
1971
1972       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1973       p0 = vlib_get_buffer (vm, p0->next_buffer);
1974       data_this_buffer = vlib_buffer_get_current (p0);
1975       n_this_buffer = p0->current_length;
1976     }
1977
1978   sum16 = ~ ip_csum_fold (sum0);
1979
1980   return sum16;
1981 }
1982
1983 static u32
1984 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1985 {
1986   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1987   udp_header_t * udp0;
1988   u16 sum16;
1989
1990   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1991           || ip0->protocol == IP_PROTOCOL_UDP);
1992
1993   udp0 = (void *) (ip0 + 1);
1994   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1995     {
1996       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1997                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1998       return p0->flags;
1999     }
2000
2001   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
2002
2003   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
2004                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
2005
2006   return p0->flags;
2007 }
2008
2009 static uword
2010 ip4_local (vlib_main_t * vm,
2011            vlib_node_runtime_t * node,
2012            vlib_frame_t * frame)
2013 {
2014   ip4_main_t * im = &ip4_main;
2015   ip_lookup_main_t * lm = &im->lookup_main;
2016   ip_local_next_t next_index;
2017   u32 * from, * to_next, n_left_from, n_left_to_next;
2018   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2019
2020   from = vlib_frame_vector_args (frame);
2021   n_left_from = frame->n_vectors;
2022   next_index = node->cached_next_index;
2023   
2024   if (node->flags & VLIB_NODE_FLAG_TRACE)
2025     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2026
2027   while (n_left_from > 0)
2028     {
2029       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2030
2031       while (n_left_from >= 4 && n_left_to_next >= 2)
2032         {
2033           vlib_buffer_t * p0, * p1;
2034           ip4_header_t * ip0, * ip1;
2035           udp_header_t * udp0, * udp1;
2036           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2037           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2038           ip_adjacency_t * adj0, * adj1;
2039           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2040           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2041           i32 len_diff0, len_diff1;
2042           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2043           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2044           u8 enqueue_code;
2045       
2046           pi0 = to_next[0] = from[0];
2047           pi1 = to_next[1] = from[1];
2048           from += 2;
2049           n_left_from -= 2;
2050           to_next += 2;
2051           n_left_to_next -= 2;
2052       
2053           p0 = vlib_get_buffer (vm, pi0);
2054           p1 = vlib_get_buffer (vm, pi1);
2055
2056           ip0 = vlib_buffer_get_current (p0);
2057           ip1 = vlib_buffer_get_current (p1);
2058
2059           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2060                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2061           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2062                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2063
2064           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2065           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2066
2067           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2068
2069           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2070           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2071
2072           proto0 = ip0->protocol;
2073           proto1 = ip1->protocol;
2074           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2075           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2076           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2077           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2078
2079           flags0 = p0->flags;
2080           flags1 = p1->flags;
2081
2082           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2083           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2084
2085           udp0 = ip4_next_header (ip0);
2086           udp1 = ip4_next_header (ip1);
2087
2088           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2089           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2090           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2091
2092           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2093           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2094
2095           /* Verify UDP length. */
2096           ip_len0 = clib_net_to_host_u16 (ip0->length);
2097           ip_len1 = clib_net_to_host_u16 (ip1->length);
2098           udp_len0 = clib_net_to_host_u16 (udp0->length);
2099           udp_len1 = clib_net_to_host_u16 (udp1->length);
2100
2101           len_diff0 = ip_len0 - udp_len0;
2102           len_diff1 = ip_len1 - udp_len1;
2103
2104           len_diff0 = is_udp0 ? len_diff0 : 0;
2105           len_diff1 = is_udp1 ? len_diff1 : 0;
2106
2107           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2108                                 & good_tcp_udp0 & good_tcp_udp1)))
2109             {
2110               if (is_tcp_udp0)
2111                 {
2112                   if (is_tcp_udp0
2113                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2114                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2115                   good_tcp_udp0 =
2116                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2117                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2118                 }
2119               if (is_tcp_udp1)
2120                 {
2121                   if (is_tcp_udp1
2122                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2123                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2124                   good_tcp_udp1 =
2125                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2126                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2127                 }
2128             }
2129
2130           good_tcp_udp0 &= len_diff0 >= 0;
2131           good_tcp_udp1 &= len_diff1 >= 0;
2132
2133           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2134           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2135
2136           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2137
2138           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2139           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2140
2141           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2142           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2143                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2144                     : error0);
2145           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2146                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2147                     : error1);
2148
2149           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2150           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2151
2152           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2153           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2154
2155           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2156           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2157
2158           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2159                                                            &ip0->src_address,
2160                                                            /* no_default_route */ 1));
2161           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2162                                                            &ip1->src_address,
2163                                                            /* no_default_route */ 1));
2164
2165           adj0 = ip_get_adjacency (lm, adj_index0);
2166           adj1 = ip_get_adjacency (lm, adj_index1);
2167
2168           /* 
2169            * Must have a route to source otherwise we drop the packet.
2170            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2171            */
2172           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2173                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2174                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2175                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2176                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2177                     ? IP4_ERROR_SRC_LOOKUP_MISS
2178                     : error0);
2179           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2180                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2181                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2182                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2183                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2184                     ? IP4_ERROR_SRC_LOOKUP_MISS
2185                     : error1);
2186
2187           next0 = lm->local_next_by_ip_protocol[proto0];
2188           next1 = lm->local_next_by_ip_protocol[proto1];
2189
2190           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2191           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2192
2193           p0->error = error0 ? error_node->errors[error0] : 0;
2194           p1->error = error1 ? error_node->errors[error1] : 0;
2195
2196           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2197
2198           if (PREDICT_FALSE (enqueue_code != 0))
2199             {
2200               switch (enqueue_code)
2201                 {
2202                 case 1:
2203                   /* A B A */
2204                   to_next[-2] = pi1;
2205                   to_next -= 1;
2206                   n_left_to_next += 1;
2207                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2208                   break;
2209
2210                 case 2:
2211                   /* A A B */
2212                   to_next -= 1;
2213                   n_left_to_next += 1;
2214                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2215                   break;
2216
2217                 case 3:
2218                   /* A B B or A B C */
2219                   to_next -= 2;
2220                   n_left_to_next += 2;
2221                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2222                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2223                   if (next0 == next1)
2224                     {
2225                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2226                       next_index = next1;
2227                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2228                     }
2229                   break;
2230                 }
2231             }
2232         }
2233
2234       while (n_left_from > 0 && n_left_to_next > 0)
2235         {
2236           vlib_buffer_t * p0;
2237           ip4_header_t * ip0;
2238           udp_header_t * udp0;
2239           ip4_fib_mtrie_t * mtrie0;
2240           ip4_fib_mtrie_leaf_t leaf0;
2241           ip_adjacency_t * adj0;
2242           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2243           i32 len_diff0;
2244           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2245       
2246           pi0 = to_next[0] = from[0];
2247           from += 1;
2248           n_left_from -= 1;
2249           to_next += 1;
2250           n_left_to_next -= 1;
2251       
2252           p0 = vlib_get_buffer (vm, pi0);
2253
2254           ip0 = vlib_buffer_get_current (p0);
2255
2256           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2257                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2258
2259           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2260
2261           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2262
2263           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2264
2265           proto0 = ip0->protocol;
2266           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2267           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2268
2269           flags0 = p0->flags;
2270
2271           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2272
2273           udp0 = ip4_next_header (ip0);
2274
2275           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2276           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2277
2278           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2279
2280           /* Verify UDP length. */
2281           ip_len0 = clib_net_to_host_u16 (ip0->length);
2282           udp_len0 = clib_net_to_host_u16 (udp0->length);
2283
2284           len_diff0 = ip_len0 - udp_len0;
2285
2286           len_diff0 = is_udp0 ? len_diff0 : 0;
2287
2288           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2289             {
2290               if (is_tcp_udp0)
2291                 {
2292                   if (is_tcp_udp0
2293                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2294                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2295                   good_tcp_udp0 =
2296                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2297                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2298                 }
2299             }
2300
2301           good_tcp_udp0 &= len_diff0 >= 0;
2302
2303           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2304
2305           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2306
2307           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2308
2309           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2310           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2311                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2312                     : error0);
2313
2314           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2315
2316           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2317           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2318
2319           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2320                                                            &ip0->src_address,
2321                                                            /* no_default_route */ 1));
2322
2323           adj0 = ip_get_adjacency (lm, adj_index0);
2324
2325           /* Must have a route to source otherwise we drop the packet. */
2326           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2327                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2328                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2329                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2330                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2331                     ? IP4_ERROR_SRC_LOOKUP_MISS
2332                     : error0);
2333
2334           next0 = lm->local_next_by_ip_protocol[proto0];
2335
2336           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2337
2338           p0->error = error0? error_node->errors[error0] : 0;
2339
2340           if (PREDICT_FALSE (next0 != next_index))
2341             {
2342               n_left_to_next += 1;
2343               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2344
2345               next_index = next0;
2346               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2347               to_next[0] = pi0;
2348               to_next += 1;
2349               n_left_to_next -= 1;
2350             }
2351         }
2352   
2353       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2354     }
2355
2356   return frame->n_vectors;
2357 }
2358
2359 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2360   .function = ip4_local,
2361   .name = "ip4-local",
2362   .vector_size = sizeof (u32),
2363
2364   .format_trace = format_ip4_forward_next_trace,
2365
2366   .n_next_nodes = IP_LOCAL_N_NEXT,
2367   .next_nodes = {
2368     [IP_LOCAL_NEXT_DROP] = "error-drop",
2369     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2370     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2371     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2372   },
2373 };
2374
2375 void ip4_register_protocol (u32 protocol, u32 node_index)
2376 {
2377   vlib_main_t * vm = vlib_get_main();
2378   ip4_main_t * im = &ip4_main;
2379   ip_lookup_main_t * lm = &im->lookup_main;
2380
2381   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2382   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2383 }
2384
2385 static clib_error_t *
2386 show_ip_local_command_fn (vlib_main_t * vm,
2387                           unformat_input_t * input,
2388                          vlib_cli_command_t * cmd)
2389 {
2390   ip4_main_t * im = &ip4_main;
2391   ip_lookup_main_t * lm = &im->lookup_main;
2392   int i;
2393
2394   vlib_cli_output (vm, "Protocols handled by ip4_local");
2395   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2396     {
2397       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2398         vlib_cli_output (vm, "%d", i);
2399     }
2400   return 0;
2401 }
2402
2403
2404
2405 VLIB_CLI_COMMAND (show_ip_local, static) = {
2406   .path = "show ip local",
2407   .function = show_ip_local_command_fn,
2408   .short_help = "Show ip local protocol table",
2409 };
2410
2411 static uword
2412 ip4_arp (vlib_main_t * vm,
2413          vlib_node_runtime_t * node,
2414          vlib_frame_t * frame)
2415 {
2416   vnet_main_t * vnm = vnet_get_main();
2417   ip4_main_t * im = &ip4_main;
2418   ip_lookup_main_t * lm = &im->lookup_main;
2419   u32 * from, * to_next_drop;
2420   uword n_left_from, n_left_to_next_drop, next_index;
2421   static f64 time_last_seed_change = -1e100;
2422   static u32 hash_seeds[3];
2423   static uword hash_bitmap[256 / BITS (uword)]; 
2424   f64 time_now;
2425
2426   if (node->flags & VLIB_NODE_FLAG_TRACE)
2427     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2428
2429   time_now = vlib_time_now (vm);
2430   if (time_now - time_last_seed_change > 1e-3)
2431     {
2432       uword i;
2433       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2434                                              sizeof (hash_seeds));
2435       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2436         hash_seeds[i] = r[i];
2437
2438       /* Mark all hash keys as been no-seen before. */
2439       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2440         hash_bitmap[i] = 0;
2441
2442       time_last_seed_change = time_now;
2443     }
2444
2445   from = vlib_frame_vector_args (frame);
2446   n_left_from = frame->n_vectors;
2447   next_index = node->cached_next_index;
2448   if (next_index == IP4_ARP_NEXT_DROP)
2449     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2450
2451   while (n_left_from > 0)
2452     {
2453       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2454                            to_next_drop, n_left_to_next_drop);
2455
2456       while (n_left_from > 0 && n_left_to_next_drop > 0)
2457         {
2458           vlib_buffer_t * p0;
2459           ip4_header_t * ip0;
2460           ethernet_header_t * eh0;
2461           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2462           uword bm0;
2463           ip_adjacency_t * adj0;
2464
2465           pi0 = from[0];
2466
2467           p0 = vlib_get_buffer (vm, pi0);
2468
2469           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2470           adj0 = ip_get_adjacency (lm, adj_index0);
2471           ip0 = vlib_buffer_get_current (p0);
2472
2473           /* 
2474            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2475            * rewrite to this packet, we need to skip it here.
2476            * Note, to distinguish from src IP addr *.8.6.*, we
2477            * check for a bcast eth dest instead of IPv4 version.
2478            */
2479           eh0 = (ethernet_header_t*)ip0;
2480           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2481             {
2482               u32 vlan_num = 0;
2483               u16 * etype = &eh0->type;
2484               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2485                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2486                 {
2487                   vlan_num += 1;
2488                   etype += 2; //vlan tag also 16 bits, same as etype
2489                 }
2490               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2491                 {
2492                   vlib_buffer_advance (
2493                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2494                   ip0 = vlib_buffer_get_current (p0);
2495                 }
2496             }
2497
2498           a0 = hash_seeds[0];
2499           b0 = hash_seeds[1];
2500           c0 = hash_seeds[2];
2501
2502           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2503           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2504
2505           a0 ^= ip0->dst_address.data_u32;
2506           b0 ^= sw_if_index0;
2507
2508           hash_v3_finalize32 (a0, b0, c0);
2509
2510           c0 &= BITS (hash_bitmap) - 1;
2511           c0 = c0 / BITS (uword);
2512           m0 = (uword) 1 << (c0 % BITS (uword));
2513
2514           bm0 = hash_bitmap[c0];
2515           drop0 = (bm0 & m0) != 0;
2516
2517           /* Mark it as seen. */
2518           hash_bitmap[c0] = bm0 | m0;
2519
2520           from += 1;
2521           n_left_from -= 1;
2522           to_next_drop[0] = pi0;
2523           to_next_drop += 1;
2524           n_left_to_next_drop -= 1;
2525
2526           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2527
2528           if (drop0)
2529             continue;
2530
2531           /* 
2532            * Can happen if the control-plane is programming tables
2533            * with traffic flowing; at least that's today's lame excuse.
2534            */
2535           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2536             {
2537               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2538             }
2539           else
2540           /* Send ARP request. */
2541           {
2542             u32 bi0 = 0;
2543             vlib_buffer_t * b0;
2544             ethernet_arp_header_t * h0;
2545             vnet_hw_interface_t * hw_if0;
2546
2547             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2548
2549             /* Add rewrite/encap string for ARP packet. */
2550             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2551
2552             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2553
2554             /* Src ethernet address in ARP header. */
2555             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2556                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2557
2558             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2559
2560             /* Copy in destination address we are requesting. */
2561             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2562
2563             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2564             b0 = vlib_get_buffer (vm, bi0);
2565             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2566
2567             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2568
2569             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2570           }
2571         }
2572
2573       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2574     }
2575
2576   return frame->n_vectors;
2577 }
2578
2579 static char * ip4_arp_error_strings[] = {
2580   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2581   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2582   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2583   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2584   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2585 };
2586
2587 VLIB_REGISTER_NODE (ip4_arp_node) = {
2588   .function = ip4_arp,
2589   .name = "ip4-arp",
2590   .vector_size = sizeof (u32),
2591
2592   .format_trace = format_ip4_forward_next_trace,
2593
2594   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2595   .error_strings = ip4_arp_error_strings,
2596
2597   .n_next_nodes = IP4_ARP_N_NEXT,
2598   .next_nodes = {
2599     [IP4_ARP_NEXT_DROP] = "error-drop",
2600   },
2601 };
2602
2603 #define foreach_notrace_ip4_arp_error           \
2604 _(DROP)                                         \
2605 _(REQUEST_SENT)                                 \
2606 _(REPLICATE_DROP)                               \
2607 _(REPLICATE_FAIL)
2608
2609 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2610 {
2611   vlib_node_runtime_t *rt = 
2612     vlib_node_get_runtime (vm, ip4_arp_node.index);
2613
2614   /* don't trace ARP request packets */
2615 #define _(a)                                    \
2616     vnet_pcap_drop_trace_filter_add_del         \
2617         (rt->errors[IP4_ARP_ERROR_##a],         \
2618          1 /* is_add */);
2619     foreach_notrace_ip4_arp_error;
2620 #undef _
2621   return 0;
2622 }
2623
2624 VLIB_INIT_FUNCTION(arp_notrace_init);
2625
2626
2627 /* Send an ARP request to see if given destination is reachable on given interface. */
2628 clib_error_t *
2629 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2630 {
2631   vnet_main_t * vnm = vnet_get_main();
2632   ip4_main_t * im = &ip4_main;
2633   ethernet_arp_header_t * h;
2634   ip4_address_t * src;
2635   ip_interface_address_t * ia;
2636   ip_adjacency_t * adj;
2637   vnet_hw_interface_t * hi;
2638   vnet_sw_interface_t * si;
2639   vlib_buffer_t * b;
2640   u32 bi = 0;
2641
2642   si = vnet_get_sw_interface (vnm, sw_if_index);
2643
2644   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2645     {
2646       return clib_error_return (0, "%U: interface %U down",
2647                                 format_ip4_address, dst, 
2648                                 format_vnet_sw_if_index_name, vnm, 
2649                                 sw_if_index);
2650     }
2651
2652   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2653   if (! src)
2654     {
2655       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2656       return clib_error_return 
2657         (0, "no matching interface address for destination %U (interface %U)",
2658          format_ip4_address, dst,
2659          format_vnet_sw_if_index_name, vnm, sw_if_index);
2660     }
2661
2662   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2663
2664   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2665
2666   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2667
2668   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2669
2670   h->ip4_over_ethernet[0].ip4 = src[0];
2671   h->ip4_over_ethernet[1].ip4 = dst[0];
2672
2673   b = vlib_get_buffer (vm, bi);
2674   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2675
2676   /* Add encapsulation string for software interface (e.g. ethernet header). */
2677   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2678   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2679
2680   {
2681     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2682     u32 * to_next = vlib_frame_vector_args (f);
2683     to_next[0] = bi;
2684     f->n_vectors = 1;
2685     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2686   }
2687
2688   return /* no error */ 0;
2689 }
2690
2691 typedef enum {
2692   IP4_REWRITE_NEXT_DROP,
2693   IP4_REWRITE_NEXT_ARP,
2694 } ip4_rewrite_next_t;
2695
2696 always_inline uword
2697 ip4_rewrite_inline (vlib_main_t * vm,
2698                     vlib_node_runtime_t * node,
2699                     vlib_frame_t * frame,
2700                     int rewrite_for_locally_received_packets)
2701 {
2702   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2703   u32 * from = vlib_frame_vector_args (frame);
2704   u32 n_left_from, n_left_to_next, * to_next, next_index;
2705   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2706   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2707
2708   n_left_from = frame->n_vectors;
2709   next_index = node->cached_next_index;
2710   u32 cpu_index = os_get_cpu_number();
2711   
2712   while (n_left_from > 0)
2713     {
2714       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2715
2716       while (n_left_from >= 4 && n_left_to_next >= 2)
2717         {
2718           ip_adjacency_t * adj0, * adj1;
2719           vlib_buffer_t * p0, * p1;
2720           ip4_header_t * ip0, * ip1;
2721           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2722           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2723           u32 next0_override, next1_override;
2724       
2725           if (rewrite_for_locally_received_packets)
2726               next0_override = next1_override = 0;
2727
2728           /* Prefetch next iteration. */
2729           {
2730             vlib_buffer_t * p2, * p3;
2731
2732             p2 = vlib_get_buffer (vm, from[2]);
2733             p3 = vlib_get_buffer (vm, from[3]);
2734
2735             vlib_prefetch_buffer_header (p2, STORE);
2736             vlib_prefetch_buffer_header (p3, STORE);
2737
2738             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2739             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2740           }
2741
2742           pi0 = to_next[0] = from[0];
2743           pi1 = to_next[1] = from[1];
2744
2745           from += 2;
2746           n_left_from -= 2;
2747           to_next += 2;
2748           n_left_to_next -= 2;
2749       
2750           p0 = vlib_get_buffer (vm, pi0);
2751           p1 = vlib_get_buffer (vm, pi1);
2752
2753           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2754           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2755
2756           /* We should never rewrite a pkt using the MISS adjacency */
2757           ASSERT(adj_index0 && adj_index1);
2758
2759           ip0 = vlib_buffer_get_current (p0);
2760           ip1 = vlib_buffer_get_current (p1);
2761
2762           error0 = error1 = IP4_ERROR_NONE;
2763
2764           /* Decrement TTL & update checksum.
2765              Works either endian, so no need for byte swap. */
2766           if (! rewrite_for_locally_received_packets)
2767             {
2768               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2769
2770               /* Input node should have reject packets with ttl 0. */
2771               ASSERT (ip0->ttl > 0);
2772               ASSERT (ip1->ttl > 0);
2773
2774               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2775               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2776
2777               checksum0 += checksum0 >= 0xffff;
2778               checksum1 += checksum1 >= 0xffff;
2779
2780               ip0->checksum = checksum0;
2781               ip1->checksum = checksum1;
2782
2783               ttl0 -= 1;
2784               ttl1 -= 1;
2785
2786               ip0->ttl = ttl0;
2787               ip1->ttl = ttl1;
2788
2789               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2790               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2791
2792               /* Verify checksum. */
2793               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2794               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2795             }
2796
2797           /* Rewrite packet header and updates lengths. */
2798           adj0 = ip_get_adjacency (lm, adj_index0);
2799           adj1 = ip_get_adjacency (lm, adj_index1);
2800       
2801           if (rewrite_for_locally_received_packets)
2802             {
2803               /*
2804                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2805                * we end up here with a local adjacency in hand
2806                * The local adj rewrite data is 0xfefe on purpose.
2807                * Bad engineer, no donut for you.
2808                */
2809               if (PREDICT_FALSE(adj0->lookup_next_index 
2810                                 == IP_LOOKUP_NEXT_LOCAL))
2811                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2812               if (PREDICT_FALSE(adj0->lookup_next_index
2813                                 == IP_LOOKUP_NEXT_ARP))
2814                 next0_override = IP4_REWRITE_NEXT_ARP;
2815               if (PREDICT_FALSE(adj1->lookup_next_index 
2816                                 == IP_LOOKUP_NEXT_LOCAL))
2817                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2818               if (PREDICT_FALSE(adj1->lookup_next_index
2819                                 == IP_LOOKUP_NEXT_ARP))
2820                 next1_override = IP4_REWRITE_NEXT_ARP;
2821             }
2822
2823           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2824           rw_len0 = adj0[0].rewrite_header.data_bytes;
2825           rw_len1 = adj1[0].rewrite_header.data_bytes;
2826           next0 = (error0 == IP4_ERROR_NONE) 
2827             ? adj0[0].rewrite_header.next_index : 0;
2828
2829           if (rewrite_for_locally_received_packets)
2830               next0 = next0 && next0_override ? next0_override : next0;
2831
2832           next1 = (error1 == IP4_ERROR_NONE)
2833             ? adj1[0].rewrite_header.next_index : 0;
2834
2835           if (rewrite_for_locally_received_packets)
2836               next1 = next1 && next1_override ? next1_override : next1;
2837
2838           /* 
2839            * We've already accounted for an ethernet_header_t elsewhere
2840            */
2841           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2842               vlib_increment_combined_counter 
2843                   (&lm->adjacency_counters,
2844                    cpu_index, adj_index0, 
2845                    /* packet increment */ 0,
2846                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2847
2848           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2849               vlib_increment_combined_counter 
2850                   (&lm->adjacency_counters,
2851                    cpu_index, adj_index1, 
2852                    /* packet increment */ 0,
2853                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2854
2855           /* Check MTU of outgoing interface. */
2856           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2857                     ? IP4_ERROR_MTU_EXCEEDED
2858                     : error0);
2859           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2860                     ? IP4_ERROR_MTU_EXCEEDED
2861                     : error1);
2862
2863           p0->current_data -= rw_len0;
2864           p1->current_data -= rw_len1;
2865
2866           p0->current_length += rw_len0;
2867           p1->current_length += rw_len1;
2868
2869           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2870           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2871       
2872           p0->error = error_node->errors[error0];
2873           p1->error = error_node->errors[error1];
2874
2875           /* Guess we are only writing on simple Ethernet header. */
2876           vnet_rewrite_two_headers (adj0[0], adj1[0],
2877                                     ip0, ip1,
2878                                     sizeof (ethernet_header_t));
2879       
2880           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2881                                            to_next, n_left_to_next,
2882                                            pi0, pi1, next0, next1);
2883         }
2884
2885       while (n_left_from > 0 && n_left_to_next > 0)
2886         {
2887           ip_adjacency_t * adj0;
2888           vlib_buffer_t * p0;
2889           ip4_header_t * ip0;
2890           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2891           u32 next0_override;
2892       
2893           if (rewrite_for_locally_received_packets)
2894               next0_override = 0;
2895
2896           pi0 = to_next[0] = from[0];
2897
2898           p0 = vlib_get_buffer (vm, pi0);
2899
2900           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2901
2902           /* We should never rewrite a pkt using the MISS adjacency */
2903           ASSERT(adj_index0);
2904
2905           adj0 = ip_get_adjacency (lm, adj_index0);
2906       
2907           ip0 = vlib_buffer_get_current (p0);
2908
2909           error0 = IP4_ERROR_NONE;
2910           next0 = 0;            /* drop on error */
2911
2912           /* Decrement TTL & update checksum. */
2913           if (! rewrite_for_locally_received_packets)
2914             {
2915               i32 ttl0 = ip0->ttl;
2916
2917               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2918
2919               checksum0 += checksum0 >= 0xffff;
2920
2921               ip0->checksum = checksum0;
2922
2923               ASSERT (ip0->ttl > 0);
2924
2925               ttl0 -= 1;
2926
2927               ip0->ttl = ttl0;
2928
2929               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2930
2931               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2932             }
2933
2934           if (rewrite_for_locally_received_packets)
2935             {
2936               /*
2937                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2938                * we end up here with a local adjacency in hand
2939                * The local adj rewrite data is 0xfefe on purpose.
2940                * Bad engineer, no donut for you.
2941                */
2942               if (PREDICT_FALSE(adj0->lookup_next_index 
2943                                 == IP_LOOKUP_NEXT_LOCAL))
2944                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2945               /* 
2946                * We have to override the next_index in ARP adjacencies,
2947                * because they're set up for ip4-arp, not this node...
2948                */
2949               if (PREDICT_FALSE(adj0->lookup_next_index
2950                                 == IP_LOOKUP_NEXT_ARP))
2951                 next0_override = IP4_REWRITE_NEXT_ARP;
2952             }
2953
2954           /* Guess we are only writing on simple Ethernet header. */
2955           vnet_rewrite_one_header (adj0[0], ip0, 
2956                                    sizeof (ethernet_header_t));
2957           
2958           /* Update packet buffer attributes/set output interface. */
2959           rw_len0 = adj0[0].rewrite_header.data_bytes;
2960           
2961           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2962               vlib_increment_combined_counter 
2963                   (&lm->adjacency_counters,
2964                    cpu_index, adj_index0, 
2965                    /* packet increment */ 0,
2966                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2967           
2968           /* Check MTU of outgoing interface. */
2969           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2970                     > adj0[0].rewrite_header.max_l3_packet_bytes
2971                     ? IP4_ERROR_MTU_EXCEEDED
2972                     : error0);
2973           
2974           p0->error = error_node->errors[error0];
2975           p0->current_data -= rw_len0;
2976           p0->current_length += rw_len0;
2977           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2978             adj0[0].rewrite_header.sw_if_index;
2979           
2980           next0 = (error0 == IP4_ERROR_NONE)
2981             ? adj0[0].rewrite_header.next_index : 0;
2982
2983           if (rewrite_for_locally_received_packets)
2984               next0 = next0 && next0_override ? next0_override : next0;
2985
2986           from += 1;
2987           n_left_from -= 1;
2988           to_next += 1;
2989           n_left_to_next -= 1;
2990       
2991           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2992                                            to_next, n_left_to_next,
2993                                            pi0, next0);
2994         }
2995   
2996       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2997     }
2998
2999   /* Need to do trace after rewrites to pick up new packet data. */
3000   if (node->flags & VLIB_NODE_FLAG_TRACE)
3001     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3002
3003   return frame->n_vectors;
3004 }
3005
3006 static uword
3007 ip4_rewrite_transit (vlib_main_t * vm,
3008                      vlib_node_runtime_t * node,
3009                      vlib_frame_t * frame)
3010 {
3011   return ip4_rewrite_inline (vm, node, frame,
3012                              /* rewrite_for_locally_received_packets */ 0);
3013 }
3014
3015 static uword
3016 ip4_rewrite_local (vlib_main_t * vm,
3017                    vlib_node_runtime_t * node,
3018                    vlib_frame_t * frame)
3019 {
3020   return ip4_rewrite_inline (vm, node, frame,
3021                              /* rewrite_for_locally_received_packets */ 1);
3022 }
3023
3024 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3025   .function = ip4_rewrite_transit,
3026   .name = "ip4-rewrite-transit",
3027   .vector_size = sizeof (u32),
3028
3029   .format_trace = format_ip4_forward_next_trace,
3030
3031   .n_next_nodes = 2,
3032   .next_nodes = {
3033     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3034     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3035   },
3036 };
3037
3038 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
3039   .function = ip4_rewrite_local,
3040   .name = "ip4-rewrite-local",
3041   .vector_size = sizeof (u32),
3042
3043   .sibling_of = "ip4-rewrite-transit",
3044
3045   .format_trace = format_ip4_forward_next_trace,
3046
3047   .n_next_nodes = 2,
3048   .next_nodes = {
3049     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3050     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3051   },
3052 };
3053
3054 static clib_error_t *
3055 add_del_interface_table (vlib_main_t * vm,
3056                          unformat_input_t * input,
3057                          vlib_cli_command_t * cmd)
3058 {
3059   vnet_main_t * vnm = vnet_get_main();
3060   clib_error_t * error = 0;
3061   u32 sw_if_index, table_id;
3062
3063   sw_if_index = ~0;
3064
3065   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3066     {
3067       error = clib_error_return (0, "unknown interface `%U'",
3068                                  format_unformat_error, input);
3069       goto done;
3070     }
3071
3072   if (unformat (input, "%d", &table_id))
3073     ;
3074   else
3075     {
3076       error = clib_error_return (0, "expected table id `%U'",
3077                                  format_unformat_error, input);
3078       goto done;
3079     }
3080
3081   {
3082     ip4_main_t * im = &ip4_main;
3083     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3084
3085     if (fib) 
3086       {
3087         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3088         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3089     }
3090   }
3091
3092  done:
3093   return error;
3094 }
3095
3096 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3097   .path = "set interface ip table",
3098   .function = add_del_interface_table,
3099   .short_help = "Add/delete FIB table id for interface",
3100 };
3101
3102
3103 static uword
3104 ip4_lookup_multicast (vlib_main_t * vm,
3105                       vlib_node_runtime_t * node,
3106                       vlib_frame_t * frame)
3107 {
3108   ip4_main_t * im = &ip4_main;
3109   ip_lookup_main_t * lm = &im->lookup_main;
3110   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3111   u32 n_left_from, n_left_to_next, * from, * to_next;
3112   ip_lookup_next_t next;
3113   u32 cpu_index = os_get_cpu_number();
3114
3115   from = vlib_frame_vector_args (frame);
3116   n_left_from = frame->n_vectors;
3117   next = node->cached_next_index;
3118
3119   while (n_left_from > 0)
3120     {
3121       vlib_get_next_frame (vm, node, next,
3122                            to_next, n_left_to_next);
3123
3124       while (n_left_from >= 4 && n_left_to_next >= 2)
3125         {
3126           vlib_buffer_t * p0, * p1;
3127           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3128           ip_lookup_next_t next0, next1;
3129           ip4_header_t * ip0, * ip1;
3130           ip_adjacency_t * adj0, * adj1;
3131           u32 fib_index0, fib_index1;
3132           u32 flow_hash_config0, flow_hash_config1;
3133
3134           /* Prefetch next iteration. */
3135           {
3136             vlib_buffer_t * p2, * p3;
3137
3138             p2 = vlib_get_buffer (vm, from[2]);
3139             p3 = vlib_get_buffer (vm, from[3]);
3140
3141             vlib_prefetch_buffer_header (p2, LOAD);
3142             vlib_prefetch_buffer_header (p3, LOAD);
3143
3144             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3145             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3146           }
3147
3148           pi0 = to_next[0] = from[0];
3149           pi1 = to_next[1] = from[1];
3150
3151           p0 = vlib_get_buffer (vm, pi0);
3152           p1 = vlib_get_buffer (vm, pi1);
3153
3154           ip0 = vlib_buffer_get_current (p0);
3155           ip1 = vlib_buffer_get_current (p1);
3156
3157           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3158           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3159           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3160             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3161           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3162             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3163
3164           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3165                                               &ip0->dst_address, p0);
3166           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3167                                               &ip1->dst_address, p1);
3168
3169           adj0 = ip_get_adjacency (lm, adj_index0);
3170           adj1 = ip_get_adjacency (lm, adj_index1);
3171
3172           next0 = adj0->lookup_next_index;
3173           next1 = adj1->lookup_next_index;
3174
3175           flow_hash_config0 = 
3176               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3177
3178           flow_hash_config1 = 
3179               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3180
3181           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3182               (ip0, flow_hash_config0);
3183                                                                   
3184           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3185               (ip1, flow_hash_config1);
3186
3187           ASSERT (adj0->n_adj > 0);
3188           ASSERT (adj1->n_adj > 0);
3189           ASSERT (is_pow2 (adj0->n_adj));
3190           ASSERT (is_pow2 (adj1->n_adj));
3191           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3192           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3193
3194           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3195           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3196
3197           if (1) /* $$$$$$ HACK FIXME */
3198           vlib_increment_combined_counter 
3199               (cm, cpu_index, adj_index0, 1,
3200                vlib_buffer_length_in_chain (vm, p0));
3201           if (1) /* $$$$$$ HACK FIXME */
3202           vlib_increment_combined_counter 
3203               (cm, cpu_index, adj_index1, 1,
3204                vlib_buffer_length_in_chain (vm, p1));
3205
3206           from += 2;
3207           to_next += 2;
3208           n_left_to_next -= 2;
3209           n_left_from -= 2;
3210
3211           wrong_next = (next0 != next) + 2*(next1 != next);
3212           if (PREDICT_FALSE (wrong_next != 0))
3213             {
3214               switch (wrong_next)
3215                 {
3216                 case 1:
3217                   /* A B A */
3218                   to_next[-2] = pi1;
3219                   to_next -= 1;
3220                   n_left_to_next += 1;
3221                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3222                   break;
3223
3224                 case 2:
3225                   /* A A B */
3226                   to_next -= 1;
3227                   n_left_to_next += 1;
3228                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3229                   break;
3230
3231                 case 3:
3232                   /* A B C */
3233                   to_next -= 2;
3234                   n_left_to_next += 2;
3235                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3236                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3237                   if (next0 == next1)
3238                     {
3239                       /* A B B */
3240                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3241                       next = next1;
3242                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3243                     }
3244                 }
3245             }
3246         }
3247     
3248       while (n_left_from > 0 && n_left_to_next > 0)
3249         {
3250           vlib_buffer_t * p0;
3251           ip4_header_t * ip0;
3252           u32 pi0, adj_index0;
3253           ip_lookup_next_t next0;
3254           ip_adjacency_t * adj0;
3255           u32 fib_index0;
3256           u32 flow_hash_config0;
3257
3258           pi0 = from[0];
3259           to_next[0] = pi0;
3260
3261           p0 = vlib_get_buffer (vm, pi0);
3262
3263           ip0 = vlib_buffer_get_current (p0);
3264
3265           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3266                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3267           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3268               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3269           
3270           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3271                                               &ip0->dst_address, p0);
3272
3273           adj0 = ip_get_adjacency (lm, adj_index0);
3274
3275           next0 = adj0->lookup_next_index;
3276
3277           flow_hash_config0 = 
3278               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3279
3280           vnet_buffer (p0)->ip.flow_hash = 
3281             ip4_compute_flow_hash (ip0, flow_hash_config0);
3282
3283           ASSERT (adj0->n_adj > 0);
3284           ASSERT (is_pow2 (adj0->n_adj));
3285           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3286
3287           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3288
3289           if (1) /* $$$$$$ HACK FIXME */
3290               vlib_increment_combined_counter 
3291                   (cm, cpu_index, adj_index0, 1,
3292                    vlib_buffer_length_in_chain (vm, p0));
3293
3294           from += 1;
3295           to_next += 1;
3296           n_left_to_next -= 1;
3297           n_left_from -= 1;
3298
3299           if (PREDICT_FALSE (next0 != next))
3300             {
3301               n_left_to_next += 1;
3302               vlib_put_next_frame (vm, node, next, n_left_to_next);
3303               next = next0;
3304               vlib_get_next_frame (vm, node, next,
3305                                    to_next, n_left_to_next);
3306               to_next[0] = pi0;
3307               to_next += 1;
3308               n_left_to_next -= 1;
3309             }
3310         }
3311
3312       vlib_put_next_frame (vm, node, next, n_left_to_next);
3313     }
3314
3315   return frame->n_vectors;
3316 }
3317
3318 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3319   .function = ip4_lookup_multicast,
3320   .name = "ip4-lookup-multicast",
3321   .vector_size = sizeof (u32),
3322
3323   .n_next_nodes = IP_LOOKUP_N_NEXT,
3324   .next_nodes = {
3325     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
3326     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
3327     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
3328     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
3329     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
3330     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
3331     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
3332     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
3333     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
3334     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
3335     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
3336     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
3337     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
3338   },
3339 };
3340
3341 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3342   .function = ip4_drop,
3343   .name = "ip4-multicast",
3344   .vector_size = sizeof (u32),
3345
3346   .format_trace = format_ip4_forward_next_trace,
3347
3348   .n_next_nodes = 1,
3349   .next_nodes = {
3350     [0] = "error-drop",
3351   },
3352 };
3353
3354 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3355 {
3356   ip4_main_t * im = &ip4_main;
3357   ip4_fib_mtrie_t * mtrie0;
3358   ip4_fib_mtrie_leaf_t leaf0;
3359   u32 adj_index0;
3360     
3361   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3362
3363   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3364   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3365   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3366   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3367   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3368   
3369   /* Handle default route. */
3370   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3371   
3372   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3373   
3374   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3375                                                   a, 
3376                                                   /* no_default_route */ 0);
3377 }
3378  
3379 static clib_error_t *
3380 test_lookup_command_fn (vlib_main_t * vm,
3381                         unformat_input_t * input,
3382                         vlib_cli_command_t * cmd)
3383 {
3384   u32 table_id = 0;
3385   f64 count = 1;
3386   u32 n;
3387   int i;
3388   ip4_address_t ip4_base_address;
3389   u64 errors = 0;
3390
3391   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3392       if (unformat (input, "table %d", &table_id))
3393         ;
3394       else if (unformat (input, "count %f", &count))
3395         ;
3396
3397       else if (unformat (input, "%U",
3398                          unformat_ip4_address, &ip4_base_address))
3399         ;
3400       else
3401         return clib_error_return (0, "unknown input `%U'",
3402                                   format_unformat_error, input);
3403   }
3404
3405   n = count;
3406
3407   for (i = 0; i < n; i++)
3408     {
3409       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3410         errors++;
3411
3412       ip4_base_address.as_u32 = 
3413         clib_host_to_net_u32 (1 + 
3414                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3415     }
3416
3417   if (errors) 
3418     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3419   else
3420     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3421
3422   return 0;
3423 }
3424
3425 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3426     .path = "test lookup",
3427     .short_help = "test lookup",
3428     .function = test_lookup_command_fn,
3429 };
3430
3431 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3432 {
3433   ip4_main_t * im4 = &ip4_main;
3434   ip4_fib_t * fib;
3435   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3436
3437   if (p == 0)
3438     return VNET_API_ERROR_NO_SUCH_FIB;
3439
3440   fib = vec_elt_at_index (im4->fibs, p[0]);
3441
3442   fib->flow_hash_config = flow_hash_config;
3443   return 0;
3444 }
3445  
3446 static clib_error_t *
3447 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3448                              unformat_input_t * input,
3449                              vlib_cli_command_t * cmd)
3450 {
3451   int matched = 0;
3452   u32 table_id = 0;
3453   u32 flow_hash_config = 0;
3454   int rv;
3455
3456   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3457     if (unformat (input, "table %d", &table_id))
3458       matched = 1;
3459 #define _(a,v) \
3460     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3461     foreach_flow_hash_bit
3462 #undef _
3463     else break;
3464   }
3465   
3466   if (matched == 0)
3467     return clib_error_return (0, "unknown input `%U'",
3468                               format_unformat_error, input);
3469   
3470   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3471   switch (rv)
3472     {
3473     case 0:
3474       break;
3475       
3476     case VNET_API_ERROR_NO_SUCH_FIB:
3477       return clib_error_return (0, "no such FIB table %d", table_id);
3478       
3479     default:
3480       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3481       break;
3482     }
3483   
3484   return 0;
3485 }
3486  
3487 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3488   .path = "set ip flow-hash",
3489   .short_help = 
3490   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3491   .function = set_ip_flow_hash_command_fn,
3492 };
3493  
3494 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3495                                  u32 table_index)
3496 {
3497   vnet_main_t * vnm = vnet_get_main();
3498   vnet_interface_main_t * im = &vnm->interface_main;
3499   ip4_main_t * ipm = &ip4_main;
3500   ip_lookup_main_t * lm = &ipm->lookup_main;
3501   vnet_classify_main_t * cm = &vnet_classify_main;
3502
3503   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3504     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3505
3506   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3507     return VNET_API_ERROR_NO_SUCH_ENTRY;
3508
3509   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3510   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3511
3512   return 0;
3513 }
3514
3515 static clib_error_t *
3516 set_ip_classify_command_fn (vlib_main_t * vm,
3517                             unformat_input_t * input,
3518                             vlib_cli_command_t * cmd)
3519 {
3520   u32 table_index = ~0;
3521   int table_index_set = 0;
3522   u32 sw_if_index = ~0;
3523   int rv;
3524   
3525   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3526     if (unformat (input, "table-index %d", &table_index))
3527       table_index_set = 1;
3528     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3529                        vnet_get_main(), &sw_if_index))
3530       ;
3531     else
3532       break;
3533   }
3534       
3535   if (table_index_set == 0)
3536     return clib_error_return (0, "classify table-index must be specified");
3537
3538   if (sw_if_index == ~0)
3539     return clib_error_return (0, "interface / subif must be specified");
3540
3541   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3542
3543   switch (rv)
3544     {
3545     case 0:
3546       break;
3547
3548     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3549       return clib_error_return (0, "No such interface");
3550
3551     case VNET_API_ERROR_NO_SUCH_ENTRY:
3552       return clib_error_return (0, "No such classifier table");
3553     }
3554   return 0;
3555 }
3556
3557 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3558     .path = "set ip classify",
3559     .short_help = 
3560     "set ip classify intfc <int> table-index <index>",
3561     .function = set_ip_classify_command_fn,
3562 };
3563