Initial commit of vpp code.
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void serialize_ip4_address (serialize_main_t * m, va_list * va)
140 {
141   ip4_address_t * a = va_arg (*va, ip4_address_t *);
142   u8 * p = serialize_get (m, sizeof (a->as_u8));
143   memcpy (p, a->as_u8, sizeof (a->as_u8));
144 }
145
146 static void unserialize_ip4_address (serialize_main_t * m, va_list * va)
147 {
148   ip4_address_t * a = va_arg (*va, ip4_address_t *);
149   u8 * p = unserialize_get (m, sizeof (a->as_u8));
150   memcpy (a->as_u8, p, sizeof (a->as_u8));
151 }
152
153 static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
154 {
155   ip4_address_t * a = va_arg (*va, ip4_address_t *);
156   u32 l = va_arg (*va, u32);
157   u32 n_bytes = (l / 8) + ((l % 8) != 0);
158   u8 * p = serialize_get (m, 1 + n_bytes);
159   ASSERT (l <= 32);
160   p[0] = l;
161   memcpy (p + 1, a->as_u8, n_bytes);
162 }
163
164 static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
165 {
166   ip4_address_t * a = va_arg (*va, ip4_address_t *);
167   u32 * al = va_arg (*va, u32 *);
168   u8 * p = unserialize_get (m, 1);
169   u32 l, n_bytes;
170
171   al[0] = l = p[0];
172   ASSERT (l <= 32);
173   n_bytes = (l / 8) + ((l % 8) != 0);
174
175   if (n_bytes)
176     {
177       p = unserialize_get (m, n_bytes);
178       memcpy (a->as_u8, p, n_bytes);
179     }
180 }
181
182 static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
183 {
184   ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *);
185     
186   serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id);
187   serialize_likely_small_unsigned_integer (m, a->flags);
188   serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length);
189   serialize_likely_small_unsigned_integer (m, a->adj_index);
190   serialize_likely_small_unsigned_integer (m, a->n_add_adj);
191   if (a->n_add_adj > 0)
192     serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj);
193 }
194
195 /* Serialized adjacencies for arp/rewrite do not send graph next_index
196    since graph hookup is not guaranteed to be the same for both sides
197    of serialize/unserialize. */
198 static void
199 unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm,
200                                            ip_adjacency_t * adj,
201                                            u32 n_adj)
202 {
203   vnet_main_t * vnm = vnet_get_main();
204   u32 i, ni, sw_if_index, is_arp;
205   vnet_hw_interface_t * hw;
206
207   for (i = 0; i < n_adj; i++)
208     {
209       switch (adj[i].lookup_next_index)
210         {
211         case IP_LOOKUP_NEXT_REWRITE:
212         case IP_LOOKUP_NEXT_ARP:
213           is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP;
214           sw_if_index = adj[i].rewrite_header.sw_if_index;
215           hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
216           ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index;
217           adj[i].rewrite_header.node_index = ni;
218           adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index);
219           if (is_arp)
220             vnet_rewrite_for_sw_interface
221               (vnm,
222                VNET_L3_PACKET_TYPE_ARP,
223                sw_if_index,
224                ni,
225                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
226                &adj[i].rewrite_header,
227                sizeof (adj->rewrite_data));
228           break;
229
230         default:
231           break;
232         }
233     }
234 }
235
236 static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
237 {
238   ip4_main_t * i4m = &ip4_main;
239   ip4_add_del_route_args_t a;
240     
241   a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m);
242   a.flags = unserialize_likely_small_unsigned_integer (m);
243   unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length);
244   a.adj_index = unserialize_likely_small_unsigned_integer (m);
245   a.n_add_adj = unserialize_likely_small_unsigned_integer (m);
246   a.add_adj = 0;
247   if (a.n_add_adj > 0)
248     {
249       vec_resize (a.add_adj, a.n_add_adj);
250       unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj);
251       unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(), 
252                                                  a.add_adj, a.n_add_adj);
253     }
254
255   /* Prevent re-re-distribution. */
256   a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE;
257
258   ip4_add_del_route (i4m, &a);
259
260   vec_free (a.add_adj);
261 }
262
263 MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = {
264   .name = "vnet_ip4_add_del_route",
265   .serialize = serialize_ip4_add_del_route_msg,
266   .unserialize = unserialize_ip4_add_del_route_msg,
267 };
268
269 static void
270 ip4_fib_set_adj_index (ip4_main_t * im,
271                        ip4_fib_t * fib,
272                        u32 flags,
273                        u32 dst_address_u32,
274                        u32 dst_address_length,
275                        u32 adj_index)
276 {
277   ip_lookup_main_t * lm = &im->lookup_main;
278   uword * hash;
279
280   if (vec_bytes(fib->old_hash_values))
281     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
282   if (vec_bytes(fib->new_hash_values))
283     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
284   fib->new_hash_values[0] = adj_index;
285
286   /* Make sure adj index is valid. */
287   if (CLIB_DEBUG > 0)
288     (void) ip_get_adjacency (lm, adj_index);
289
290   hash = fib->adj_index_by_dst_address[dst_address_length];
291
292   hash = _hash_set3 (hash, dst_address_u32,
293                      fib->new_hash_values,
294                      fib->old_hash_values);
295
296   fib->adj_index_by_dst_address[dst_address_length] = hash;
297
298   if (vec_len (im->add_del_route_callbacks) > 0)
299     {
300       ip4_add_del_route_callback_t * cb;
301       ip4_address_t d;
302       uword * p;
303
304       d.data_u32 = dst_address_u32;
305       vec_foreach (cb, im->add_del_route_callbacks)
306         if ((flags & cb->required_flags) == cb->required_flags)
307           cb->function (im, cb->function_opaque,
308                         fib, flags,
309                         &d, dst_address_length,
310                         fib->old_hash_values,
311                         fib->new_hash_values);
312
313       p = hash_get (hash, dst_address_u32);
314       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
315     }
316 }
317
318 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
319 {
320   vlib_main_t * vm = vlib_get_main();
321   ip_lookup_main_t * lm = &im->lookup_main;
322   ip4_fib_t * fib;
323   u32 dst_address, dst_address_length, adj_index, old_adj_index;
324   uword * hash, is_del;
325   ip4_add_del_route_callback_t * cb;
326
327   if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
328     {
329       u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
330       mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer,
331                      &ip4_add_del_route_msg, a);
332       return;
333     }
334
335   /* Either create new adjacency or use given one depending on arguments. */
336   if (a->n_add_adj > 0)
337     {
338       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
339       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
340     }
341   else
342     adj_index = a->adj_index;
343
344   dst_address = a->dst_address.data_u32;
345   dst_address_length = a->dst_address_length;
346   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
347
348   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
349   dst_address &= im->fib_masks[dst_address_length];
350
351   if (! fib->adj_index_by_dst_address[dst_address_length])
352     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
353
354   hash = fib->adj_index_by_dst_address[dst_address_length];
355
356   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
357
358   if (is_del)
359     {
360       fib->old_hash_values[0] = ~0;
361       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
362       fib->adj_index_by_dst_address[dst_address_length] = hash;
363
364       if (vec_len (im->add_del_route_callbacks) > 0
365           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
366         {
367           fib->new_hash_values[0] = ~0;
368           vec_foreach (cb, im->add_del_route_callbacks)
369             if ((a->flags & cb->required_flags) == cb->required_flags)
370               cb->function (im, cb->function_opaque,
371                             fib, a->flags,
372                             &a->dst_address, dst_address_length,
373                             fib->old_hash_values,
374                             fib->new_hash_values);
375         }
376     }
377   else
378     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
379                            adj_index);
380
381   old_adj_index = fib->old_hash_values[0];
382
383   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
384                                is_del ? old_adj_index : adj_index,
385                                is_del);
386
387   /* Delete old adjacency index if present and changed. */
388   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
389       && old_adj_index != ~0
390       && old_adj_index != adj_index)
391     ip_del_adjacency (lm, old_adj_index);
392 }
393
394 static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
395 {
396   u32 flags = va_arg (*va, u32);
397   ip4_address_t * dst_address = va_arg (*va, ip4_address_t *);
398   u32 dst_address_length = va_arg (*va, u32);
399   ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *);
400   u32 next_hop_sw_if_index = va_arg (*va, u32);
401   u32 next_hop_weight = va_arg (*va, u32);
402
403   serialize_likely_small_unsigned_integer (m, flags);
404   serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length);
405   serialize (m, serialize_ip4_address, next_hop_address);
406   serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index);
407   serialize_likely_small_unsigned_integer (m, next_hop_weight);
408 }
409
410 static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
411 {
412   ip4_main_t * im = &ip4_main;
413   u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight;
414   ip4_address_t dst_address, next_hop_address;
415
416   flags = unserialize_likely_small_unsigned_integer (m);
417   unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length);
418   unserialize (m, unserialize_ip4_address, &next_hop_address);
419   next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m);
420   next_hop_weight = unserialize_likely_small_unsigned_integer (m);
421
422   ip4_add_del_route_next_hop
423     (im,
424      flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE,
425      &dst_address,
426      dst_address_length,
427      &next_hop_address,
428      next_hop_sw_if_index,
429      next_hop_weight, (u32)~0, 
430      (u32)~0 /* explicit FIB index */);
431 }
432
433 MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = {
434   .name = "vnet_ip4_add_del_route_next_hop",
435   .serialize = serialize_ip4_add_del_route_next_hop_msg,
436   .unserialize = unserialize_ip4_add_del_route_next_hop_msg,
437 };
438
439 void
440 ip4_add_del_route_next_hop (ip4_main_t * im,
441                             u32 flags,
442                             ip4_address_t * dst_address,
443                             u32 dst_address_length,
444                             ip4_address_t * next_hop,
445                             u32 next_hop_sw_if_index,
446                             u32 next_hop_weight, u32 adj_index, 
447                             u32 explicit_fib_index)
448 {
449   vnet_main_t * vnm = vnet_get_main();
450   vlib_main_t * vm = vlib_get_main();
451   ip_lookup_main_t * lm = &im->lookup_main;
452   u32 fib_index;
453   ip4_fib_t * fib;
454   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
455   u32 dst_adj_index, nh_adj_index;
456   uword * dst_hash, * dst_result;
457   uword * nh_hash, * nh_result;
458   ip_adjacency_t * dst_adj;
459   ip_multipath_adjacency_t * old_mp, * new_mp;
460   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
461   int is_interface_next_hop;
462   clib_error_t * error = 0;
463
464   if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
465     {
466       u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
467       mc_serialize2 (vm->mc_main,
468                      multiple_messages_per_vlib_buffer,
469                      &ip4_add_del_route_next_hop_msg,
470                      flags,
471                      dst_address, dst_address_length,
472                      next_hop, next_hop_sw_if_index, next_hop_weight);
473       return;
474     }
475
476   if (explicit_fib_index == (u32)~0)
477       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
478   else
479       fib_index = explicit_fib_index;
480
481   fib = vec_elt_at_index (im->fibs, fib_index);
482   
483   /* Lookup next hop to be added or deleted. */
484   is_interface_next_hop = next_hop->data_u32 == 0;
485   if (adj_index == (u32)~0)
486     {
487       if (is_interface_next_hop)
488         {
489           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
490           if (nh_result)
491             nh_adj_index = *nh_result;
492           else
493             {
494               ip_adjacency_t * adj;
495               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
496                                       &nh_adj_index);
497               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
498               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
499               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
500             }
501         }
502       else
503         {
504           nh_hash = fib->adj_index_by_dst_address[32];
505           nh_result = hash_get (nh_hash, next_hop->data_u32);
506           
507           /* Next hop must be known. */
508           if (! nh_result)
509             {
510               vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
511               error = clib_error_return (0, "next-hop %U/32 not in FIB",
512                                          format_ip4_address, next_hop);
513               goto done;
514             }
515           nh_adj_index = *nh_result;
516         }
517     }
518   else
519     {
520       nh_adj_index = adj_index;
521     }
522   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
523   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
524
525   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
526   dst_result = hash_get (dst_hash, dst_address_u32);
527   if (dst_result)
528     {
529       dst_adj_index = dst_result[0];
530       dst_adj = ip_get_adjacency (lm, dst_adj_index);
531     }
532   else
533     {
534       /* For deletes destination must be known. */
535       if (is_del)
536         {
537           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
538           error = clib_error_return (0, "unknown destination %U/%d",
539                                      format_ip4_address, dst_address,
540                                      dst_address_length);
541           goto done;
542         }
543
544       dst_adj_index = ~0;
545       dst_adj = 0;
546     }
547
548   /* Ignore adds of X/32 with next hop of X. */
549   if (! is_del
550       && dst_address_length == 32
551       && dst_address->data_u32 == next_hop->data_u32 
552       && adj_index != (u32)~0)
553     {
554       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
555       error = clib_error_return (0, "prefix matches next hop %U/%d",
556                                  format_ip4_address, dst_address,
557                                  dst_address_length);
558       goto done;
559     }
560
561   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
562
563   if (! ip_multipath_adjacency_add_del_next_hop
564       (lm, is_del,
565        old_mp_adj_index,
566        nh_adj_index,
567        next_hop_weight,
568        &new_mp_adj_index))
569     {
570       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
571       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
572                                  format_ip4_address, next_hop);
573       goto done;
574     }
575   
576   old_mp = new_mp = 0;
577   if (old_mp_adj_index != ~0)
578     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
579   if (new_mp_adj_index != ~0)
580     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
581
582   if (old_mp != new_mp)
583     {
584       ip4_add_del_route_args_t a;
585       a.table_index_or_table_id = fib_index;
586       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
587                  | IP4_ROUTE_FLAG_FIB_INDEX
588                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
589                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
590       a.dst_address = dst_address[0];
591       a.dst_address_length = dst_address_length;
592       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
593       a.add_adj = 0;
594       a.n_add_adj = 0;
595
596       ip4_add_del_route (im, &a);
597     }
598
599  done:
600   if (error)
601     clib_error_report (error);
602 }
603
604 void *
605 ip4_get_route (ip4_main_t * im,
606                u32 table_index_or_table_id,
607                u32 flags,
608                u8 * address,
609                u32 address_length)
610 {
611   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
612   u32 dst_address = * (u32 *) address;
613   uword * hash, * p;
614
615   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
616   dst_address &= im->fib_masks[address_length];
617
618   hash = fib->adj_index_by_dst_address[address_length];
619   p = hash_get (hash, dst_address);
620   return (void *) p;
621 }
622
623 void
624 ip4_foreach_matching_route (ip4_main_t * im,
625                             u32 table_index_or_table_id,
626                             u32 flags,
627                             ip4_address_t * address,
628                             u32 address_length,
629                             ip4_address_t ** results,
630                             u8 ** result_lengths)
631 {
632   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
633   u32 dst_address = address->data_u32;
634   u32 this_length = address_length;
635   
636   if (*results)
637     _vec_len (*results) = 0;
638   if (*result_lengths)
639     _vec_len (*result_lengths) = 0;
640
641   while (this_length <= 32 && vec_len (results) == 0)
642     {
643       uword k, v;
644       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
645         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
646           {
647             ip4_address_t a;
648             a.data_u32 = k;
649             vec_add1 (*results, a);
650             vec_add1 (*result_lengths, this_length);
651           }
652       }));
653
654       this_length++;
655     }
656 }
657
658 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
659                                   u32 table_index_or_table_id,
660                                   u32 flags)
661 {
662   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
663   ip_lookup_main_t * lm = &im->lookup_main;
664   u32 i, l;
665   ip4_address_t a;
666   ip4_add_del_route_callback_t * cb;
667   static ip4_address_t * to_delete;
668
669   if (lm->n_adjacency_remaps == 0)
670     return;
671
672   for (l = 0; l <= 32; l++)
673     {
674       hash_pair_t * p;
675       uword * hash = fib->adj_index_by_dst_address[l];
676
677       if (hash_elts (hash) == 0)
678         continue;
679
680       if (to_delete)
681         _vec_len (to_delete) = 0;
682
683       hash_foreach_pair (p, hash, ({
684         u32 adj_index = p->value[0];
685         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
686
687         if (m)
688           {
689             /* Record destination address from hash key. */
690             a.data_u32 = p->key;
691
692             /* New adjacency points to nothing: so delete prefix. */
693             if (m == ~0)
694               vec_add1 (to_delete, a);
695             else
696               {
697                 /* Remap to new adjacency. */
698                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
699
700                 /* Set new adjacency value. */
701                 fib->new_hash_values[0] = p->value[0] = m - 1;
702
703                 vec_foreach (cb, im->add_del_route_callbacks)
704                   if ((flags & cb->required_flags) == cb->required_flags)
705                     cb->function (im, cb->function_opaque,
706                                   fib, flags | IP4_ROUTE_FLAG_ADD,
707                                   &a, l,
708                                   fib->old_hash_values,
709                                   fib->new_hash_values);
710               }
711           }
712       }));
713
714       fib->new_hash_values[0] = ~0;
715       for (i = 0; i < vec_len (to_delete); i++)
716         {
717           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
718           vec_foreach (cb, im->add_del_route_callbacks)
719             if ((flags & cb->required_flags) == cb->required_flags)
720               cb->function (im, cb->function_opaque,
721                             fib, flags | IP4_ROUTE_FLAG_DEL,
722                             &a, l,
723                             fib->old_hash_values,
724                             fib->new_hash_values);
725         }
726     }
727
728   /* Also remap adjacencies in mtrie. */
729   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
730
731   /* Reset mapping table. */
732   vec_zero (lm->adjacency_remap_table);
733
734   /* All remaps have been performed. */
735   lm->n_adjacency_remaps = 0;
736 }
737
738 void ip4_delete_matching_routes (ip4_main_t * im,
739                                  u32 table_index_or_table_id,
740                                  u32 flags,
741                                  ip4_address_t * address,
742                                  u32 address_length)
743 {
744   static ip4_address_t * matching_addresses;
745   static u8 * matching_address_lengths;
746   u32 l, i;
747   ip4_add_del_route_args_t a;
748
749   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
750   a.table_index_or_table_id = table_index_or_table_id;
751   a.adj_index = ~0;
752   a.add_adj = 0;
753   a.n_add_adj = 0;
754
755   for (l = address_length + 1; l <= 32; l++)
756     {
757       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
758                                   address,
759                                   l,
760                                   &matching_addresses,
761                                   &matching_address_lengths);
762       for (i = 0; i < vec_len (matching_addresses); i++)
763         {
764           a.dst_address = matching_addresses[i];
765           a.dst_address_length = matching_address_lengths[i];
766           ip4_add_del_route (im, &a);
767         }
768     }
769
770   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
771 }
772
773 always_inline uword
774 ip4_lookup_inline (vlib_main_t * vm,
775                    vlib_node_runtime_t * node,
776                    vlib_frame_t * frame,
777                    int lookup_for_responses_to_locally_received_packets)
778 {
779   ip4_main_t * im = &ip4_main;
780   ip_lookup_main_t * lm = &im->lookup_main;
781   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
782   u32 n_left_from, n_left_to_next, * from, * to_next;
783   ip_lookup_next_t next;
784   u32 cpu_index = os_get_cpu_number();
785
786   from = vlib_frame_vector_args (frame);
787   n_left_from = frame->n_vectors;
788   next = node->cached_next_index;
789
790   while (n_left_from > 0)
791     {
792       vlib_get_next_frame (vm, node, next,
793                            to_next, n_left_to_next);
794
795       while (n_left_from >= 4 && n_left_to_next >= 2)
796         {
797           vlib_buffer_t * p0, * p1;
798           ip4_header_t * ip0, * ip1;
799           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
800           ip_lookup_next_t next0, next1;
801           ip_adjacency_t * adj0, * adj1;
802           ip4_fib_mtrie_t * mtrie0, * mtrie1;
803           ip4_fib_mtrie_leaf_t leaf0, leaf1;
804           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
805           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
806           u32 flow_hash_config0, flow_hash_config1;
807           u32 hash_c0, hash_c1;
808           u32 wrong_next;
809
810           /* Prefetch next iteration. */
811           {
812             vlib_buffer_t * p2, * p3;
813
814             p2 = vlib_get_buffer (vm, from[2]);
815             p3 = vlib_get_buffer (vm, from[3]);
816
817             vlib_prefetch_buffer_header (p2, LOAD);
818             vlib_prefetch_buffer_header (p3, LOAD);
819
820             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
821             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
822           }
823
824           pi0 = to_next[0] = from[0];
825           pi1 = to_next[1] = from[1];
826
827           p0 = vlib_get_buffer (vm, pi0);
828           p1 = vlib_get_buffer (vm, pi1);
829
830           ip0 = vlib_buffer_get_current (p0);
831           ip1 = vlib_buffer_get_current (p1);
832
833           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
834           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
835           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
836             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
837           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
838             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
839
840
841           if (! lookup_for_responses_to_locally_received_packets)
842             {
843               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
844               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
845
846               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
847
848               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
849               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
850             }
851
852           tcp0 = (void *) (ip0 + 1);
853           tcp1 = (void *) (ip1 + 1);
854
855           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
856                          || ip0->protocol == IP_PROTOCOL_UDP);
857           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
858                          || ip1->protocol == IP_PROTOCOL_UDP);
859
860           if (! lookup_for_responses_to_locally_received_packets)
861             {
862               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
863               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
864             }
865
866           if (! lookup_for_responses_to_locally_received_packets)
867             {
868               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
869               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
870             }
871
872           if (! lookup_for_responses_to_locally_received_packets)
873             {
874               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
875               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
876             }
877
878           if (lookup_for_responses_to_locally_received_packets)
879             {
880               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
881               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
882             }
883           else
884             {
885               /* Handle default route. */
886               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
887               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
888
889               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
890               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
891             }
892
893           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
894                                                            &ip0->dst_address,
895                                                            /* no_default_route */ 0));
896           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
897                                                            &ip1->dst_address,
898                                                            /* no_default_route */ 0));
899           adj0 = ip_get_adjacency (lm, adj_index0);
900           adj1 = ip_get_adjacency (lm, adj_index1);
901
902           next0 = adj0->lookup_next_index;
903           next1 = adj1->lookup_next_index;
904
905           /* Use flow hash to compute multipath adjacency. */
906           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
907           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
908           if (PREDICT_FALSE (adj0->n_adj > 1))
909             {
910               flow_hash_config0 = 
911                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
912               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
913                 ip4_compute_flow_hash (ip0, flow_hash_config0);
914             }
915           if (PREDICT_FALSE(adj1->n_adj > 1))
916             {
917               flow_hash_config1 = 
918                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
919               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
920                 ip4_compute_flow_hash (ip1, flow_hash_config1);
921             }
922
923           ASSERT (adj0->n_adj > 0);
924           ASSERT (adj1->n_adj > 0);
925           ASSERT (is_pow2 (adj0->n_adj));
926           ASSERT (is_pow2 (adj1->n_adj));
927           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
928           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
929
930           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
931           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
932
933           vlib_increment_combined_counter 
934               (cm, cpu_index, adj_index0, 1,
935                vlib_buffer_length_in_chain (vm, p0) 
936                + sizeof(ethernet_header_t));
937           vlib_increment_combined_counter 
938               (cm, cpu_index, adj_index1, 1,
939                vlib_buffer_length_in_chain (vm, p1)
940                + sizeof(ethernet_header_t));
941
942           from += 2;
943           to_next += 2;
944           n_left_to_next -= 2;
945           n_left_from -= 2;
946
947           wrong_next = (next0 != next) + 2*(next1 != next);
948           if (PREDICT_FALSE (wrong_next != 0))
949             {
950               switch (wrong_next)
951                 {
952                 case 1:
953                   /* A B A */
954                   to_next[-2] = pi1;
955                   to_next -= 1;
956                   n_left_to_next += 1;
957                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
958                   break;
959
960                 case 2:
961                   /* A A B */
962                   to_next -= 1;
963                   n_left_to_next += 1;
964                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
965                   break;
966
967                 case 3:
968                   /* A B C */
969                   to_next -= 2;
970                   n_left_to_next += 2;
971                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
972                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
973                   if (next0 == next1)
974                     {
975                       /* A B B */
976                       vlib_put_next_frame (vm, node, next, n_left_to_next);
977                       next = next1;
978                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
979                     }
980                 }
981             }
982         }
983     
984       while (n_left_from > 0 && n_left_to_next > 0)
985         {
986           vlib_buffer_t * p0;
987           ip4_header_t * ip0;
988           __attribute__((unused)) tcp_header_t * tcp0;
989           ip_lookup_next_t next0;
990           ip_adjacency_t * adj0;
991           ip4_fib_mtrie_t * mtrie0;
992           ip4_fib_mtrie_leaf_t leaf0;
993           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
994           u32 flow_hash_config0, hash_c0;
995
996           pi0 = from[0];
997           to_next[0] = pi0;
998
999           p0 = vlib_get_buffer (vm, pi0);
1000
1001           ip0 = vlib_buffer_get_current (p0);
1002
1003           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1004           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
1005             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
1006
1007           if (! lookup_for_responses_to_locally_received_packets)
1008             {
1009               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1010
1011               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1012
1013               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
1014             }
1015
1016           tcp0 = (void *) (ip0 + 1);
1017
1018           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
1019                          || ip0->protocol == IP_PROTOCOL_UDP);
1020
1021           if (! lookup_for_responses_to_locally_received_packets)
1022             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
1023
1024           if (! lookup_for_responses_to_locally_received_packets)
1025             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
1026
1027           if (! lookup_for_responses_to_locally_received_packets)
1028             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
1029
1030           if (lookup_for_responses_to_locally_received_packets)
1031             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1032           else
1033             {
1034               /* Handle default route. */
1035               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1036               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1037             }
1038
1039           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1040                                                            &ip0->dst_address,
1041                                                            /* no_default_route */ 0));
1042
1043           adj0 = ip_get_adjacency (lm, adj_index0);
1044
1045           next0 = adj0->lookup_next_index;
1046
1047           /* Use flow hash to compute multipath adjacency. */
1048           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1049           if (PREDICT_FALSE(adj0->n_adj > 1))
1050             {
1051               flow_hash_config0 = 
1052                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1053
1054               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1055                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1056             }
1057
1058           ASSERT (adj0->n_adj > 0);
1059           ASSERT (is_pow2 (adj0->n_adj));
1060           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1061
1062           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1063
1064           vlib_increment_combined_counter 
1065               (cm, cpu_index, adj_index0, 1,
1066                vlib_buffer_length_in_chain (vm, p0)
1067                + sizeof(ethernet_header_t));
1068
1069           from += 1;
1070           to_next += 1;
1071           n_left_to_next -= 1;
1072           n_left_from -= 1;
1073
1074           if (PREDICT_FALSE (next0 != next))
1075             {
1076               n_left_to_next += 1;
1077               vlib_put_next_frame (vm, node, next, n_left_to_next);
1078               next = next0;
1079               vlib_get_next_frame (vm, node, next,
1080                                    to_next, n_left_to_next);
1081               to_next[0] = pi0;
1082               to_next += 1;
1083               n_left_to_next -= 1;
1084             }
1085         }
1086
1087       vlib_put_next_frame (vm, node, next, n_left_to_next);
1088     }
1089
1090   return frame->n_vectors;
1091 }
1092
1093 static uword
1094 ip4_lookup (vlib_main_t * vm,
1095             vlib_node_runtime_t * node,
1096             vlib_frame_t * frame)
1097 {
1098   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
1099
1100 }
1101
1102 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1103                                         ip_adjacency_t * adj,
1104                                         u32 sw_if_index,
1105                                         u32 if_address_index)
1106 {
1107   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1108   ip_lookup_next_t n;
1109   vnet_l3_packet_type_t packet_type;
1110   u32 node_index;
1111
1112   if (hw->hw_class_index == ethernet_hw_interface_class.index
1113       || hw->hw_class_index == srp_hw_interface_class.index)
1114     {
1115       /* 
1116        * We have a bit of a problem in this case. ip4-arp uses
1117        * the rewrite_header.next_index to hand pkts to the
1118        * indicated inteface output node. We can end up in
1119        * ip4_rewrite_local, too, which also pays attention to 
1120        * rewrite_header.next index. Net result: a hack in
1121        * ip4_rewrite_local...
1122        */
1123       n = IP_LOOKUP_NEXT_ARP;
1124       node_index = ip4_arp_node.index;
1125       adj->if_address_index = if_address_index;
1126       packet_type = VNET_L3_PACKET_TYPE_ARP;
1127     }
1128   else
1129     {
1130       n = IP_LOOKUP_NEXT_REWRITE;
1131       node_index = ip4_rewrite_node.index;
1132       packet_type = VNET_L3_PACKET_TYPE_IP4;
1133     }
1134
1135   adj->lookup_next_index = n;
1136   vnet_rewrite_for_sw_interface
1137     (vnm,
1138      packet_type,
1139      sw_if_index,
1140      node_index,
1141      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1142      &adj->rewrite_header,
1143      sizeof (adj->rewrite_data));
1144 }
1145
1146 static void
1147 ip4_add_interface_routes (u32 sw_if_index,
1148                           ip4_main_t * im, u32 fib_index,
1149                           ip_interface_address_t * a)
1150 {
1151   vnet_main_t * vnm = vnet_get_main();
1152   ip_lookup_main_t * lm = &im->lookup_main;
1153   ip_adjacency_t * adj;
1154   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1155   ip4_add_del_route_args_t x;
1156   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1157   u32 classify_table_index;
1158
1159   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1160   x.table_index_or_table_id = fib_index;
1161   x.flags = (IP4_ROUTE_FLAG_ADD
1162              | IP4_ROUTE_FLAG_FIB_INDEX
1163              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1164   x.dst_address = address[0];
1165   x.dst_address_length = a->address_length;
1166   x.n_add_adj = 0;
1167   x.add_adj = 0;
1168
1169   a->neighbor_probe_adj_index = ~0;
1170   if (a->address_length < 32)
1171     {
1172       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1173                               &x.adj_index);
1174       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1175       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1176       ip4_add_del_route (im, &x);
1177       a->neighbor_probe_adj_index = x.adj_index;
1178     }
1179   
1180   /* Add e.g. 1.1.1.1/32 as local to this host. */
1181   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1182                           &x.adj_index);
1183   
1184   classify_table_index = ~0;
1185   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1186     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1187   if (classify_table_index != (u32) ~0)
1188     {
1189       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1190       adj->classify_table_index = classify_table_index;
1191     }
1192   else
1193     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1194   
1195   adj->if_address_index = a - lm->if_address_pool;
1196   adj->rewrite_header.sw_if_index = sw_if_index;
1197   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1198   /* 
1199    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1200    * fail an RPF-ish check, but still go thru the rewrite code...
1201    */
1202   adj->rewrite_header.data_bytes = 0;
1203
1204   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1205   x.dst_address_length = 32;
1206   ip4_add_del_route (im, &x);
1207 }
1208
1209 static void
1210 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1211 {
1212   ip4_add_del_route_args_t x;
1213
1214   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1215   x.table_index_or_table_id = fib_index;
1216   x.flags = (IP4_ROUTE_FLAG_DEL
1217              | IP4_ROUTE_FLAG_FIB_INDEX
1218              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1219   x.dst_address = address[0];
1220   x.dst_address_length = address_length;
1221   x.adj_index = ~0;
1222   x.n_add_adj = 0;
1223   x.add_adj = 0;
1224
1225   if (address_length < 32)
1226     ip4_add_del_route (im, &x);
1227
1228   x.dst_address_length = 32;
1229   ip4_add_del_route (im, &x);
1230
1231   ip4_delete_matching_routes (im,
1232                               fib_index,
1233                               IP4_ROUTE_FLAG_FIB_INDEX,
1234                               address,
1235                               address_length);
1236 }
1237
1238 typedef struct {
1239     u32 sw_if_index;
1240     ip4_address_t address;
1241     u32 length;
1242 } ip4_interface_address_t;
1243
1244 static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1245 {
1246     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1247     u32 n = va_arg (*va, u32);
1248     u32 i;
1249     for (i = 0; i < n; i++) {
1250         serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index));
1251         serialize (m, serialize_ip4_address, &a[i].address);
1252         serialize_integer (m, a[i].length, sizeof (a[i].length));
1253     }
1254 }
1255
1256 static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
1257 {
1258     ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1259     u32 n = va_arg (*va, u32);
1260     u32 i;
1261     for (i = 0; i < n; i++) {
1262         unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index));
1263         unserialize (m, unserialize_ip4_address, &a[i].address);
1264         unserialize_integer (m, &a[i].length, sizeof (a[i].length));
1265     }
1266 }
1267
1268 static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1269 {
1270   ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
1271   int is_del = va_arg (*va, int);
1272   serialize (m, serialize_vec_ip4_set_interface_address, a, 1);
1273   serialize_integer (m, is_del, sizeof (is_del));
1274 }
1275
1276 static clib_error_t *
1277 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1278                                         u32 sw_if_index,
1279                                         ip4_address_t * new_address,
1280                                         u32 new_length,
1281                                         u32 redistribute,
1282                                         u32 insert_routes,
1283                                         u32 is_del);
1284
1285 static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
1286 {
1287   mc_main_t * mcm = va_arg (*va, mc_main_t *);
1288   vlib_main_t * vm = mcm->vlib_main;
1289   ip4_interface_address_t a;
1290   clib_error_t * error;
1291   int is_del;
1292
1293   unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1);
1294   unserialize_integer (m, &is_del, sizeof (is_del));
1295   error = ip4_add_del_interface_address_internal
1296     (vm, a.sw_if_index, &a.address, a.length,
1297      /* redistribute */ 0,
1298      /* insert_routes */ 1,
1299      is_del);
1300   if (error)
1301     clib_error_report (error);
1302 }
1303
1304 MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = {
1305   .name = "vnet_ip4_set_interface_address",
1306   .serialize = serialize_ip4_set_interface_address_msg,
1307   .unserialize = unserialize_ip4_set_interface_address_msg,
1308 };
1309
1310 static clib_error_t *
1311 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1312                                         u32 sw_if_index,
1313                                         ip4_address_t * address,
1314                                         u32 address_length,
1315                                         u32 redistribute,
1316                                         u32 insert_routes,
1317                                         u32 is_del)
1318 {
1319   vnet_main_t * vnm = vnet_get_main();
1320   ip4_main_t * im = &ip4_main;
1321   ip_lookup_main_t * lm = &im->lookup_main;
1322   clib_error_t * error = 0;
1323   u32 if_address_index, elts_before;
1324   ip4_address_fib_t ip4_af, * addr_fib = 0;
1325
1326   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1327   ip4_addr_fib_init (&ip4_af, address,
1328                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1329   vec_add1 (addr_fib, ip4_af);
1330
1331   /* When adding an address check that it does not conflict with an existing address. */
1332   if (! is_del)
1333     {
1334       ip_interface_address_t * ia;
1335       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1336                                     0 /* honor unnumbered */,
1337       ({
1338         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1339
1340         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1341             || ip4_destination_matches_route (im, x, address, address_length))
1342           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1343                                     format_ip4_address_and_length, address, address_length,
1344                                     format_ip4_address_and_length, x, ia->address_length,
1345                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1346       }));
1347     }
1348
1349   if (vm->mc_main && redistribute)
1350     {
1351       ip4_interface_address_t a;
1352       a.sw_if_index = sw_if_index;
1353       a.address = address[0];
1354       a.length = address_length;
1355       mc_serialize (vm->mc_main, &ip4_set_interface_address_msg, 
1356                     &a, (int)is_del);
1357       goto done;
1358     }
1359     
1360   elts_before = pool_elts (lm->if_address_pool);
1361
1362   error = ip_interface_address_add_del
1363     (lm,
1364      sw_if_index,
1365      addr_fib,
1366      address_length,
1367      is_del,
1368      &if_address_index);
1369   if (error)
1370     goto done;
1371   
1372   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1373     {
1374       if (is_del)
1375         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1376                                   address_length);
1377       
1378       else
1379           ip4_add_interface_routes (sw_if_index,
1380                                     im, ip4_af.fib_index,
1381                                     pool_elt_at_index 
1382                                     (lm->if_address_pool, if_address_index));
1383     }
1384
1385   /* If pool did not grow/shrink: add duplicate address. */
1386   if (elts_before != pool_elts (lm->if_address_pool))
1387     {
1388       ip4_add_del_interface_address_callback_t * cb;
1389       vec_foreach (cb, im->add_del_interface_address_callbacks)
1390         cb->function (im, cb->function_opaque, sw_if_index,
1391                       address, address_length,
1392                       if_address_index,
1393                       is_del);
1394     }
1395
1396  done:
1397   vec_free (addr_fib);
1398   return error;
1399 }
1400
1401 clib_error_t *
1402 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1403                                ip4_address_t * address, u32 address_length,
1404                                u32 is_del)
1405 {
1406   return ip4_add_del_interface_address_internal
1407     (vm, sw_if_index, address, address_length,
1408      /* redistribute */ 1,
1409      /* insert_routes */ 1,
1410      is_del);
1411 }
1412
1413 static void serialize_ip4_fib (serialize_main_t * m, va_list * va)
1414 {
1415   ip4_fib_t * f = va_arg (*va, ip4_fib_t *);
1416   u32 l, dst, adj_index;
1417
1418   serialize_integer (m, f->table_id, sizeof (f->table_id));
1419   for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++)
1420     {
1421       u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]);
1422
1423       serialize_integer (m, n_elts, sizeof (n_elts));
1424       hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({
1425         ip4_address_t tmp;
1426         tmp.as_u32 = dst;
1427         serialize (m, serialize_ip4_address, &tmp);
1428         serialize_integer (m, adj_index, sizeof (adj_index));
1429       }));
1430     }
1431 }
1432
1433 static void unserialize_ip4_fib (serialize_main_t * m, va_list * va)
1434 {
1435   ip4_add_del_route_args_t a;
1436   u32 i;
1437
1438   a.flags = (IP4_ROUTE_FLAG_ADD
1439              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE
1440              | IP4_ROUTE_FLAG_TABLE_ID);
1441   a.n_add_adj = 0;
1442   a.add_adj = 0;
1443
1444   unserialize_integer (m, &a.table_index_or_table_id,
1445                        sizeof (a.table_index_or_table_id));
1446
1447   for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++)
1448     {
1449       u32 n_elts;
1450       unserialize_integer (m, &n_elts, sizeof (u32));
1451       a.dst_address_length = i;
1452       while (n_elts > 0)
1453         {
1454           unserialize (m, unserialize_ip4_address, &a.dst_address);
1455           unserialize_integer (m, &a.adj_index, sizeof (a.adj_index));
1456           ip4_add_del_route (&ip4_main, &a);
1457           n_elts--;
1458         }
1459     }
1460 }
1461
1462 void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1463 {
1464   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
1465   vnet_interface_main_t * vim = &vnm->interface_main;
1466   vnet_sw_interface_t * si;
1467   ip4_main_t * i4m = &ip4_main;
1468   ip4_interface_address_t * as = 0, * a;
1469
1470   /* Download adjacency tables & multipath stuff. */
1471   serialize (m, serialize_ip_lookup_main, &i4m->lookup_main);
1472
1473   /* FIBs. */
1474   {
1475     ip4_fib_t * f;
1476     u32 n_fibs = vec_len (i4m->fibs);
1477     serialize_integer (m, n_fibs, sizeof (n_fibs));
1478     vec_foreach (f, i4m->fibs)
1479       serialize (m, serialize_ip4_fib, f);
1480   }
1481
1482   /* FIB interface config. */
1483   vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32);
1484
1485   /* Interface ip4 addresses. */
1486   pool_foreach (si, vim->sw_interfaces, ({
1487     u32 sw_if_index = si->sw_if_index;
1488     ip_interface_address_t * ia;
1489     foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index, 
1490                                   0 /* honor unnumbered */,
1491     ({
1492       ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia);
1493       vec_add2 (as, a, 1);
1494       a->address = x[0];
1495       a->length = ia->address_length;
1496       a->sw_if_index = sw_if_index;
1497     }));
1498   }));
1499   vec_serialize (m, as, serialize_vec_ip4_set_interface_address);
1500   vec_free (as);
1501 }
1502
1503 void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
1504 {
1505   vlib_main_t * vm = va_arg (*va, vlib_main_t *);
1506   ip4_main_t * i4m = &ip4_main;
1507   ip4_interface_address_t * as = 0, * a;
1508
1509   unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main);
1510
1511   {
1512     ip_adjacency_t * adj, * adj_heap;
1513     u32 n_adj;
1514     adj_heap = i4m->lookup_main.adjacency_heap;
1515     heap_foreach (adj, n_adj, adj_heap, ({
1516       unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj);
1517       ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0);
1518     }));
1519   }
1520
1521   /* FIBs */
1522   {
1523     u32 i, n_fibs;
1524     unserialize_integer (m, &n_fibs, sizeof (n_fibs));
1525     for (i = 0; i < n_fibs; i++)
1526       unserialize (m, unserialize_ip4_fib);
1527   }
1528
1529   vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32);
1530
1531   vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address);
1532   vec_foreach (a, as) {
1533     ip4_add_del_interface_address_internal
1534       (vm, a->sw_if_index, &a->address, a->length,
1535        /* redistribute */ 0,
1536        /* insert_routes */ 0,
1537        /* is_del */ 0);
1538   }
1539   vec_free (as);
1540 }
1541
1542 static clib_error_t *
1543 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1544                                 u32 sw_if_index,
1545                                 u32 flags)
1546 {
1547   ip4_main_t * im = &ip4_main;
1548   ip_interface_address_t * ia;
1549   ip4_address_t * a;
1550   u32 is_admin_up, fib_index;
1551   
1552   /* Fill in lookup tables with default table (0). */
1553   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1554   
1555   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1556   
1557   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1558   
1559   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1560
1561   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1562                                 0 /* honor unnumbered */,
1563   ({
1564     a = ip_interface_address_get_address (&im->lookup_main, ia);
1565     if (is_admin_up)
1566       ip4_add_interface_routes (sw_if_index,
1567                                 im, fib_index,
1568                                 ia);
1569     else
1570       ip4_del_interface_routes (im, fib_index,
1571                                 a, ia->address_length);
1572   }));
1573
1574   return 0;
1575 }
1576  
1577 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1578
1579 static clib_error_t *
1580 ip4_sw_interface_add_del (vnet_main_t * vnm,
1581                           u32 sw_if_index,
1582                           u32 is_add)
1583 {
1584   vlib_main_t * vm = vnm->vlib_main;
1585   ip4_main_t * im = &ip4_main;
1586   ip_lookup_main_t * lm = &im->lookup_main;
1587   u32 ci, cast;
1588
1589   for (cast = 0; cast < VNET_N_CAST; cast++)
1590     {
1591       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1592       vnet_config_main_t * vcm = &cm->config_main;
1593
1594       if (! vcm->node_index_by_feature_index)
1595         {
1596           if (cast == VNET_UNICAST)
1597             {
1598               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1599               static char * feature_nodes[] = {
1600                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1601                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1602                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1603                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1604                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1605                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1606               };
1607
1608               vnet_config_init (vm, vcm,
1609                                 start_nodes, ARRAY_LEN (start_nodes),
1610                                 feature_nodes, ARRAY_LEN (feature_nodes));
1611             }
1612           else
1613             {
1614               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1615               static char * feature_nodes[] = {
1616                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1617                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1618               };
1619
1620               vnet_config_init (vm, vcm,
1621                                 start_nodes, ARRAY_LEN (start_nodes),
1622                                 feature_nodes, ARRAY_LEN (feature_nodes));
1623             }
1624         }
1625
1626       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1627       ci = cm->config_index_by_sw_if_index[sw_if_index];
1628
1629       if (is_add)
1630         ci = vnet_config_add_feature (vm, vcm,
1631                                       ci,
1632                                       IP4_RX_FEATURE_LOOKUP,
1633                                       /* config data */ 0,
1634                                       /* # bytes of config data */ 0);
1635       else
1636         ci = vnet_config_del_feature (vm, vcm,
1637                                       ci,
1638                                       IP4_RX_FEATURE_LOOKUP,
1639                                       /* config data */ 0,
1640                                       /* # bytes of config data */ 0);
1641
1642       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1643     }
1644
1645   return /* no error */ 0;
1646 }
1647
1648 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1649
1650 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1651   .function = ip4_lookup,
1652   .name = "ip4-lookup",
1653   .vector_size = sizeof (u32),
1654
1655   .n_next_nodes = IP_LOOKUP_N_NEXT,
1656   .next_nodes = {
1657     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
1658     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
1659     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
1660     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
1661     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
1662     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
1663     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
1664     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
1665     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
1666     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
1667     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
1668     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
1669     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
1670   },
1671 };
1672
1673 /* Global IP4 main. */
1674 ip4_main_t ip4_main;
1675
1676 clib_error_t *
1677 ip4_lookup_init (vlib_main_t * vm)
1678 {
1679   ip4_main_t * im = &ip4_main;
1680   uword i;
1681
1682   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1683     {
1684       u32 m;
1685
1686       if (i < 32)
1687         m = pow2_mask (i) << (32 - i);
1688       else 
1689         m = ~0;
1690       im->fib_masks[i] = clib_host_to_net_u32 (m);
1691     }
1692
1693   /* Create FIB with index 0 and table id of 0. */
1694   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1695
1696   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1697
1698   {
1699     pg_node_t * pn;
1700     pn = pg_get_node (ip4_lookup_node.index);
1701     pn->unformat_edit = unformat_pg_ip4_header;
1702   }
1703
1704   {
1705     ethernet_arp_header_t h;
1706
1707     memset (&h, 0, sizeof (h));
1708
1709     /* Set target ethernet address to all zeros. */
1710     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1711
1712 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1713 #define _8(f,v) h.f = v;
1714     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1715     _16 (l3_type, ETHERNET_TYPE_IP4);
1716     _8 (n_l2_address_bytes, 6);
1717     _8 (n_l3_address_bytes, 4);
1718     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1719 #undef _16
1720 #undef _8
1721
1722     vlib_packet_template_init (vm,
1723                                &im->ip4_arp_request_packet_template,
1724                                /* data */ &h,
1725                                sizeof (h),
1726                                /* alloc chunk size */ 8,
1727                                "ip4 arp");
1728   }
1729
1730   return 0;
1731 }
1732
1733 VLIB_INIT_FUNCTION (ip4_lookup_init);
1734
1735 typedef struct {
1736   /* Adjacency taken. */
1737   u32 adj_index;
1738   u32 flow_hash;
1739   u32 fib_index;
1740
1741   /* Packet data, possibly *after* rewrite. */
1742   u8 packet_data[64 - 1*sizeof(u32)];
1743 } ip4_forward_next_trace_t;
1744
1745 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1746 {
1747   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1748   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1749   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1750   vnet_main_t * vnm = vnet_get_main();
1751   ip4_main_t * im = &ip4_main;
1752   ip_adjacency_t * adj;
1753   uword indent = format_get_indent (s);
1754
1755   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1756   s = format (s, "fib: %d adjacency: %U flow hash: 0x%08x",
1757               t->fib_index, format_ip_adjacency,
1758               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1759   switch (adj->lookup_next_index)
1760     {
1761     case IP_LOOKUP_NEXT_REWRITE:
1762       s = format (s, "\n%U%U",
1763                   format_white_space, indent,
1764                   format_ip_adjacency_packet_data,
1765                   vnm, &im->lookup_main, t->adj_index,
1766                   t->packet_data, sizeof (t->packet_data));
1767       break;
1768
1769     default:
1770       break;
1771     }
1772
1773   return s;
1774 }
1775
1776 /* Common trace function for all ip4-forward next nodes. */
1777 void
1778 ip4_forward_next_trace (vlib_main_t * vm,
1779                         vlib_node_runtime_t * node,
1780                         vlib_frame_t * frame,
1781                         vlib_rx_or_tx_t which_adj_index)
1782 {
1783   u32 * from, n_left;
1784   ip4_main_t * im = &ip4_main;
1785
1786   n_left = frame->n_vectors;
1787   from = vlib_frame_vector_args (frame);
1788   
1789   while (n_left >= 4)
1790     {
1791       u32 bi0, bi1;
1792       vlib_buffer_t * b0, * b1;
1793       ip4_forward_next_trace_t * t0, * t1;
1794
1795       /* Prefetch next iteration. */
1796       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1797       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1798
1799       bi0 = from[0];
1800       bi1 = from[1];
1801
1802       b0 = vlib_get_buffer (vm, bi0);
1803       b1 = vlib_get_buffer (vm, bi1);
1804
1805       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1806         {
1807           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1808           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1809           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1810           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1811                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1812           memcpy (t0->packet_data,
1813                   vlib_buffer_get_current (b0),
1814                   sizeof (t0->packet_data));
1815         }
1816       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1817         {
1818           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1819           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1820           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1821           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1822                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1823           memcpy (t1->packet_data,
1824                   vlib_buffer_get_current (b1),
1825                   sizeof (t1->packet_data));
1826         }
1827       from += 2;
1828       n_left -= 2;
1829     }
1830
1831   while (n_left >= 1)
1832     {
1833       u32 bi0;
1834       vlib_buffer_t * b0;
1835       ip4_forward_next_trace_t * t0;
1836
1837       bi0 = from[0];
1838
1839       b0 = vlib_get_buffer (vm, bi0);
1840
1841       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1842         {
1843           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1844           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1845           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1846           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1847                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1848           memcpy (t0->packet_data,
1849                   vlib_buffer_get_current (b0),
1850                   sizeof (t0->packet_data));
1851         }
1852       from += 1;
1853       n_left -= 1;
1854     }
1855 }
1856
1857 static uword
1858 ip4_drop_or_punt (vlib_main_t * vm,
1859                   vlib_node_runtime_t * node,
1860                   vlib_frame_t * frame,
1861                   ip4_error_t error_code)
1862 {
1863   u32 * buffers = vlib_frame_vector_args (frame);
1864   uword n_packets = frame->n_vectors;
1865
1866   vlib_error_drop_buffers (vm, node,
1867                            buffers,
1868                            /* stride */ 1,
1869                            n_packets,
1870                            /* next */ 0,
1871                            ip4_input_node.index,
1872                            error_code);
1873
1874   if (node->flags & VLIB_NODE_FLAG_TRACE)
1875     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1876
1877   return n_packets;
1878 }
1879
1880 static uword
1881 ip4_drop (vlib_main_t * vm,
1882           vlib_node_runtime_t * node,
1883           vlib_frame_t * frame)
1884 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1885
1886 static uword
1887 ip4_punt (vlib_main_t * vm,
1888           vlib_node_runtime_t * node,
1889           vlib_frame_t * frame)
1890 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1891
1892 static uword
1893 ip4_miss (vlib_main_t * vm,
1894           vlib_node_runtime_t * node,
1895           vlib_frame_t * frame)
1896 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1897
1898 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1899   .function = ip4_drop,
1900   .name = "ip4-drop",
1901   .vector_size = sizeof (u32),
1902
1903   .format_trace = format_ip4_forward_next_trace,
1904
1905   .n_next_nodes = 1,
1906   .next_nodes = {
1907     [0] = "error-drop",
1908   },
1909 };
1910
1911 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1912   .function = ip4_punt,
1913   .name = "ip4-punt",
1914   .vector_size = sizeof (u32),
1915
1916   .format_trace = format_ip4_forward_next_trace,
1917
1918   .n_next_nodes = 1,
1919   .next_nodes = {
1920     [0] = "error-punt",
1921   },
1922 };
1923
1924 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1925   .function = ip4_miss,
1926   .name = "ip4-miss",
1927   .vector_size = sizeof (u32),
1928
1929   .format_trace = format_ip4_forward_next_trace,
1930
1931   .n_next_nodes = 1,
1932   .next_nodes = {
1933     [0] = "error-drop",
1934   },
1935 };
1936
1937 /* Compute TCP/UDP/ICMP4 checksum in software. */
1938 u16
1939 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1940                               ip4_header_t * ip0)
1941 {
1942   ip_csum_t sum0;
1943   u32 ip_header_length, payload_length_host_byte_order;
1944   u32 n_this_buffer, n_bytes_left;
1945   u16 sum16;
1946   void * data_this_buffer;
1947   
1948   /* Initialize checksum with ip header. */
1949   ip_header_length = ip4_header_bytes (ip0);
1950   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1951   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1952
1953   if (BITS (uword) == 32)
1954     {
1955       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1956       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1957     }
1958   else
1959     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1960
1961   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1962   data_this_buffer = (void *) ip0 + ip_header_length;
1963   if (n_this_buffer + ip_header_length > p0->current_length)
1964     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1965   while (1)
1966     {
1967       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1968       n_bytes_left -= n_this_buffer;
1969       if (n_bytes_left == 0)
1970         break;
1971
1972       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1973       p0 = vlib_get_buffer (vm, p0->next_buffer);
1974       data_this_buffer = vlib_buffer_get_current (p0);
1975       n_this_buffer = p0->current_length;
1976     }
1977
1978   sum16 = ~ ip_csum_fold (sum0);
1979
1980   return sum16;
1981 }
1982
1983 static u32
1984 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1985 {
1986   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1987   udp_header_t * udp0;
1988   u16 sum16;
1989
1990   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1991           || ip0->protocol == IP_PROTOCOL_UDP);
1992
1993   udp0 = (void *) (ip0 + 1);
1994   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1995     {
1996       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1997                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1998       return p0->flags;
1999     }
2000
2001   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
2002
2003   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
2004                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
2005
2006   return p0->flags;
2007 }
2008
2009 static uword
2010 ip4_local (vlib_main_t * vm,
2011            vlib_node_runtime_t * node,
2012            vlib_frame_t * frame)
2013 {
2014   ip4_main_t * im = &ip4_main;
2015   ip_lookup_main_t * lm = &im->lookup_main;
2016   ip_local_next_t next_index;
2017   u32 * from, * to_next, n_left_from, n_left_to_next;
2018   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2019
2020   from = vlib_frame_vector_args (frame);
2021   n_left_from = frame->n_vectors;
2022   next_index = node->cached_next_index;
2023   
2024   if (node->flags & VLIB_NODE_FLAG_TRACE)
2025     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2026
2027   while (n_left_from > 0)
2028     {
2029       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2030
2031       while (n_left_from >= 4 && n_left_to_next >= 2)
2032         {
2033           vlib_buffer_t * p0, * p1;
2034           ip4_header_t * ip0, * ip1;
2035           udp_header_t * udp0, * udp1;
2036           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2037           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2038           ip_adjacency_t * adj0, * adj1;
2039           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2040           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2041           i32 len_diff0, len_diff1;
2042           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2043           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2044           u8 enqueue_code;
2045       
2046           pi0 = to_next[0] = from[0];
2047           pi1 = to_next[1] = from[1];
2048           from += 2;
2049           n_left_from -= 2;
2050           to_next += 2;
2051           n_left_to_next -= 2;
2052       
2053           p0 = vlib_get_buffer (vm, pi0);
2054           p1 = vlib_get_buffer (vm, pi1);
2055
2056           ip0 = vlib_buffer_get_current (p0);
2057           ip1 = vlib_buffer_get_current (p1);
2058
2059           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2060                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2061           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2062                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2063
2064           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2065           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2066
2067           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2068
2069           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2070           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2071
2072           proto0 = ip0->protocol;
2073           proto1 = ip1->protocol;
2074           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2075           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2076           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2077           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2078
2079           flags0 = p0->flags;
2080           flags1 = p1->flags;
2081
2082           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2083           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2084
2085           udp0 = ip4_next_header (ip0);
2086           udp1 = ip4_next_header (ip1);
2087
2088           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2089           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2090           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2091
2092           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2093           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2094
2095           /* Verify UDP length. */
2096           ip_len0 = clib_net_to_host_u16 (ip0->length);
2097           ip_len1 = clib_net_to_host_u16 (ip1->length);
2098           udp_len0 = clib_net_to_host_u16 (udp0->length);
2099           udp_len1 = clib_net_to_host_u16 (udp1->length);
2100
2101           len_diff0 = ip_len0 - udp_len0;
2102           len_diff1 = ip_len1 - udp_len1;
2103
2104           len_diff0 = is_udp0 ? len_diff0 : 0;
2105           len_diff1 = is_udp1 ? len_diff1 : 0;
2106
2107           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2108                                 & good_tcp_udp0 & good_tcp_udp1)))
2109             {
2110               if (is_tcp_udp0)
2111                 {
2112                   if (is_tcp_udp0
2113                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2114                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2115                   good_tcp_udp0 =
2116                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2117                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2118                 }
2119               if (is_tcp_udp1)
2120                 {
2121                   if (is_tcp_udp1
2122                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2123                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2124                   good_tcp_udp1 =
2125                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2126                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2127                 }
2128             }
2129
2130           good_tcp_udp0 &= len_diff0 >= 0;
2131           good_tcp_udp1 &= len_diff1 >= 0;
2132
2133           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2134           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2135
2136           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2137
2138           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2139           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2140
2141           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2142           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2143                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2144                     : error0);
2145           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2146                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2147                     : error1);
2148
2149           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2150           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2151
2152           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2153           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2154
2155           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2156           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2157
2158           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2159                                                            &ip0->src_address,
2160                                                            /* no_default_route */ 1));
2161           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2162                                                            &ip1->src_address,
2163                                                            /* no_default_route */ 1));
2164
2165           adj0 = ip_get_adjacency (lm, adj_index0);
2166           adj1 = ip_get_adjacency (lm, adj_index1);
2167
2168           /* 
2169            * Must have a route to source otherwise we drop the packet.
2170            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2171            */
2172           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2173                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2174                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2175                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2176                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2177                     ? IP4_ERROR_SRC_LOOKUP_MISS
2178                     : error0);
2179           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2180                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2181                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2182                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2183                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2184                     ? IP4_ERROR_SRC_LOOKUP_MISS
2185                     : error1);
2186
2187           next0 = lm->local_next_by_ip_protocol[proto0];
2188           next1 = lm->local_next_by_ip_protocol[proto1];
2189
2190           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2191           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2192
2193           p0->error = error0 ? error_node->errors[error0] : 0;
2194           p1->error = error1 ? error_node->errors[error1] : 0;
2195
2196           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2197
2198           if (PREDICT_FALSE (enqueue_code != 0))
2199             {
2200               switch (enqueue_code)
2201                 {
2202                 case 1:
2203                   /* A B A */
2204                   to_next[-2] = pi1;
2205                   to_next -= 1;
2206                   n_left_to_next += 1;
2207                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2208                   break;
2209
2210                 case 2:
2211                   /* A A B */
2212                   to_next -= 1;
2213                   n_left_to_next += 1;
2214                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2215                   break;
2216
2217                 case 3:
2218                   /* A B B or A B C */
2219                   to_next -= 2;
2220                   n_left_to_next += 2;
2221                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2222                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2223                   if (next0 == next1)
2224                     {
2225                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2226                       next_index = next1;
2227                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2228                     }
2229                   break;
2230                 }
2231             }
2232         }
2233
2234       while (n_left_from > 0 && n_left_to_next > 0)
2235         {
2236           vlib_buffer_t * p0;
2237           ip4_header_t * ip0;
2238           udp_header_t * udp0;
2239           ip4_fib_mtrie_t * mtrie0;
2240           ip4_fib_mtrie_leaf_t leaf0;
2241           ip_adjacency_t * adj0;
2242           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2243           i32 len_diff0;
2244           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2245       
2246           pi0 = to_next[0] = from[0];
2247           from += 1;
2248           n_left_from -= 1;
2249           to_next += 1;
2250           n_left_to_next -= 1;
2251       
2252           p0 = vlib_get_buffer (vm, pi0);
2253
2254           ip0 = vlib_buffer_get_current (p0);
2255
2256           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2257                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2258
2259           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2260
2261           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2262
2263           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2264
2265           proto0 = ip0->protocol;
2266           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2267           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2268
2269           flags0 = p0->flags;
2270
2271           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2272
2273           udp0 = ip4_next_header (ip0);
2274
2275           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2276           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2277
2278           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2279
2280           /* Verify UDP length. */
2281           ip_len0 = clib_net_to_host_u16 (ip0->length);
2282           udp_len0 = clib_net_to_host_u16 (udp0->length);
2283
2284           len_diff0 = ip_len0 - udp_len0;
2285
2286           len_diff0 = is_udp0 ? len_diff0 : 0;
2287
2288           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2289             {
2290               if (is_tcp_udp0)
2291                 {
2292                   if (is_tcp_udp0
2293                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2294                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2295                   good_tcp_udp0 =
2296                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2297                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2298                 }
2299             }
2300
2301           good_tcp_udp0 &= len_diff0 >= 0;
2302
2303           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2304
2305           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2306
2307           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2308
2309           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2310           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2311                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2312                     : error0);
2313
2314           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2315
2316           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2317           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2318
2319           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2320                                                            &ip0->src_address,
2321                                                            /* no_default_route */ 1));
2322
2323           adj0 = ip_get_adjacency (lm, adj_index0);
2324
2325           /* Must have a route to source otherwise we drop the packet. */
2326           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2327                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2328                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2329                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2330                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2331                     ? IP4_ERROR_SRC_LOOKUP_MISS
2332                     : error0);
2333
2334           next0 = lm->local_next_by_ip_protocol[proto0];
2335
2336           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2337
2338           p0->error = error0? error_node->errors[error0] : 0;
2339
2340           if (PREDICT_FALSE (next0 != next_index))
2341             {
2342               n_left_to_next += 1;
2343               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2344
2345               next_index = next0;
2346               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2347               to_next[0] = pi0;
2348               to_next += 1;
2349               n_left_to_next -= 1;
2350             }
2351         }
2352   
2353       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2354     }
2355
2356   return frame->n_vectors;
2357 }
2358
2359 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2360   .function = ip4_local,
2361   .name = "ip4-local",
2362   .vector_size = sizeof (u32),
2363
2364   .format_trace = format_ip4_forward_next_trace,
2365
2366   .n_next_nodes = IP_LOCAL_N_NEXT,
2367   .next_nodes = {
2368     [IP_LOCAL_NEXT_DROP] = "error-drop",
2369     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2370     // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip4-tcp-lookup",
2371     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2372     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2373   },
2374 };
2375
2376 void ip4_register_protocol (u32 protocol, u32 node_index)
2377 {
2378   vlib_main_t * vm = vlib_get_main();
2379   ip4_main_t * im = &ip4_main;
2380   ip_lookup_main_t * lm = &im->lookup_main;
2381
2382   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2383   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2384 }
2385
2386 static clib_error_t *
2387 show_ip_local_command_fn (vlib_main_t * vm,
2388                           unformat_input_t * input,
2389                          vlib_cli_command_t * cmd)
2390 {
2391   ip4_main_t * im = &ip4_main;
2392   ip_lookup_main_t * lm = &im->lookup_main;
2393   int i;
2394
2395   vlib_cli_output (vm, "Protocols handled by ip4_local");
2396   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2397     {
2398       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2399         vlib_cli_output (vm, "%d", i);
2400     }
2401   return 0;
2402 }
2403
2404
2405
2406 VLIB_CLI_COMMAND (show_ip_local, static) = {
2407   .path = "show ip local",
2408   .function = show_ip_local_command_fn,
2409   .short_help = "Show ip local protocol table",
2410 };
2411
2412 static uword
2413 ip4_arp (vlib_main_t * vm,
2414          vlib_node_runtime_t * node,
2415          vlib_frame_t * frame)
2416 {
2417   vnet_main_t * vnm = vnet_get_main();
2418   ip4_main_t * im = &ip4_main;
2419   ip_lookup_main_t * lm = &im->lookup_main;
2420   u32 * from, * to_next_drop;
2421   uword n_left_from, n_left_to_next_drop, next_index;
2422   static f64 time_last_seed_change = -1e100;
2423   static u32 hash_seeds[3];
2424   static uword hash_bitmap[256 / BITS (uword)]; 
2425   f64 time_now;
2426
2427   if (node->flags & VLIB_NODE_FLAG_TRACE)
2428     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2429
2430   time_now = vlib_time_now (vm);
2431   if (time_now - time_last_seed_change > 1e-3)
2432     {
2433       uword i;
2434       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2435                                              sizeof (hash_seeds));
2436       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2437         hash_seeds[i] = r[i];
2438
2439       /* Mark all hash keys as been no-seen before. */
2440       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2441         hash_bitmap[i] = 0;
2442
2443       time_last_seed_change = time_now;
2444     }
2445
2446   from = vlib_frame_vector_args (frame);
2447   n_left_from = frame->n_vectors;
2448   next_index = node->cached_next_index;
2449   if (next_index == IP4_ARP_NEXT_DROP)
2450     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2451
2452   while (n_left_from > 0)
2453     {
2454       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2455                            to_next_drop, n_left_to_next_drop);
2456
2457       while (n_left_from > 0 && n_left_to_next_drop > 0)
2458         {
2459           vlib_buffer_t * p0;
2460           ip4_header_t * ip0;
2461           ethernet_header_t * eh0;
2462           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2463           uword bm0;
2464           ip_adjacency_t * adj0;
2465
2466           pi0 = from[0];
2467
2468           p0 = vlib_get_buffer (vm, pi0);
2469
2470           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2471           adj0 = ip_get_adjacency (lm, adj_index0);
2472           ip0 = vlib_buffer_get_current (p0);
2473
2474           /* 
2475            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2476            * rewrite to this packet, we need to skip it here.
2477            * Note, to distinguish from src IP addr *.8.6.*, we
2478            * check for a bcast eth dest instead of IPv4 version.
2479            */
2480           eh0 = (ethernet_header_t*)ip0;
2481           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2482             {
2483               u32 vlan_num = 0;
2484               u16 * etype = &eh0->type;
2485               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2486                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2487                 {
2488                   vlan_num += 1;
2489                   etype += 2; //vlan tag also 16 bits, same as etype
2490                 }
2491               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2492                 {
2493                   vlib_buffer_advance (
2494                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2495                   ip0 = vlib_buffer_get_current (p0);
2496                 }
2497             }
2498
2499           a0 = hash_seeds[0];
2500           b0 = hash_seeds[1];
2501           c0 = hash_seeds[2];
2502
2503           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2504           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2505
2506           a0 ^= ip0->dst_address.data_u32;
2507           b0 ^= sw_if_index0;
2508
2509           hash_v3_finalize32 (a0, b0, c0);
2510
2511           c0 &= BITS (hash_bitmap) - 1;
2512           c0 = c0 / BITS (uword);
2513           m0 = (uword) 1 << (c0 % BITS (uword));
2514
2515           bm0 = hash_bitmap[c0];
2516           drop0 = (bm0 & m0) != 0;
2517
2518           /* Mark it as seen. */
2519           hash_bitmap[c0] = bm0 | m0;
2520
2521           from += 1;
2522           n_left_from -= 1;
2523           to_next_drop[0] = pi0;
2524           to_next_drop += 1;
2525           n_left_to_next_drop -= 1;
2526
2527           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2528
2529           if (drop0)
2530             continue;
2531
2532           /* 
2533            * Can happen if the control-plane is programming tables
2534            * with traffic flowing; at least that's today's lame excuse.
2535            */
2536           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2537             {
2538               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2539             }
2540           else
2541           /* Send ARP request. */
2542           {
2543             u32 bi0 = 0;
2544             vlib_buffer_t * b0;
2545             ethernet_arp_header_t * h0;
2546             vnet_hw_interface_t * hw_if0;
2547
2548             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2549
2550             /* Add rewrite/encap string for ARP packet. */
2551             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2552
2553             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2554
2555             /* Src ethernet address in ARP header. */
2556             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2557                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2558
2559             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2560
2561             /* Copy in destination address we are requesting. */
2562             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2563
2564             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2565             b0 = vlib_get_buffer (vm, bi0);
2566             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2567
2568             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2569
2570             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2571           }
2572         }
2573
2574       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2575     }
2576
2577   return frame->n_vectors;
2578 }
2579
2580 static char * ip4_arp_error_strings[] = {
2581   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2582   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2583   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2584   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2585   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2586 };
2587
2588 VLIB_REGISTER_NODE (ip4_arp_node) = {
2589   .function = ip4_arp,
2590   .name = "ip4-arp",
2591   .vector_size = sizeof (u32),
2592
2593   .format_trace = format_ip4_forward_next_trace,
2594
2595   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2596   .error_strings = ip4_arp_error_strings,
2597
2598   .n_next_nodes = IP4_ARP_N_NEXT,
2599   .next_nodes = {
2600     [IP4_ARP_NEXT_DROP] = "error-drop",
2601   },
2602 };
2603
2604 #define foreach_notrace_ip4_arp_error           \
2605 _(DROP)                                         \
2606 _(REQUEST_SENT)                                 \
2607 _(REPLICATE_DROP)                               \
2608 _(REPLICATE_FAIL)
2609
2610 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2611 {
2612   vlib_node_runtime_t *rt = 
2613     vlib_node_get_runtime (vm, ip4_arp_node.index);
2614
2615   /* don't trace ARP request packets */
2616 #define _(a)                                    \
2617     vnet_pcap_drop_trace_filter_add_del         \
2618         (rt->errors[IP4_ARP_ERROR_##a],         \
2619          1 /* is_add */);
2620     foreach_notrace_ip4_arp_error;
2621 #undef _
2622   return 0;
2623 }
2624
2625 VLIB_INIT_FUNCTION(arp_notrace_init);
2626
2627
2628 /* Send an ARP request to see if given destination is reachable on given interface. */
2629 clib_error_t *
2630 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2631 {
2632   vnet_main_t * vnm = vnet_get_main();
2633   ip4_main_t * im = &ip4_main;
2634   ethernet_arp_header_t * h;
2635   ip4_address_t * src;
2636   ip_interface_address_t * ia;
2637   ip_adjacency_t * adj;
2638   vnet_hw_interface_t * hi;
2639   vnet_sw_interface_t * si;
2640   vlib_buffer_t * b;
2641   u32 bi = 0;
2642
2643   si = vnet_get_sw_interface (vnm, sw_if_index);
2644
2645   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2646     {
2647       return clib_error_return (0, "%U: interface %U down",
2648                                 format_ip4_address, dst, 
2649                                 format_vnet_sw_if_index_name, vnm, 
2650                                 sw_if_index);
2651     }
2652
2653   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2654   if (! src)
2655     {
2656       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2657       return clib_error_return 
2658         (0, "no matching interface address for destination %U (interface %U)",
2659          format_ip4_address, dst,
2660          format_vnet_sw_if_index_name, vnm, sw_if_index);
2661     }
2662
2663   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2664
2665   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2666
2667   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2668
2669   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2670
2671   h->ip4_over_ethernet[0].ip4 = src[0];
2672   h->ip4_over_ethernet[1].ip4 = dst[0];
2673
2674   b = vlib_get_buffer (vm, bi);
2675   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2676
2677   /* Add encapsulation string for software interface (e.g. ethernet header). */
2678   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2679   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2680
2681   {
2682     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2683     u32 * to_next = vlib_frame_vector_args (f);
2684     to_next[0] = bi;
2685     f->n_vectors = 1;
2686     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2687   }
2688
2689   return /* no error */ 0;
2690 }
2691
2692 typedef enum {
2693   IP4_REWRITE_NEXT_DROP,
2694   IP4_REWRITE_NEXT_ARP,
2695 } ip4_rewrite_next_t;
2696
2697 always_inline uword
2698 ip4_rewrite_inline (vlib_main_t * vm,
2699                     vlib_node_runtime_t * node,
2700                     vlib_frame_t * frame,
2701                     int rewrite_for_locally_received_packets)
2702 {
2703   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2704   u32 * from = vlib_frame_vector_args (frame);
2705   u32 n_left_from, n_left_to_next, * to_next, next_index;
2706   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2707   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2708
2709   n_left_from = frame->n_vectors;
2710   next_index = node->cached_next_index;
2711   u32 cpu_index = os_get_cpu_number();
2712   
2713   while (n_left_from > 0)
2714     {
2715       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2716
2717       while (n_left_from >= 4 && n_left_to_next >= 2)
2718         {
2719           ip_adjacency_t * adj0, * adj1;
2720           vlib_buffer_t * p0, * p1;
2721           ip4_header_t * ip0, * ip1;
2722           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2723           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2724           u32 next0_override, next1_override;
2725       
2726           if (rewrite_for_locally_received_packets)
2727               next0_override = next1_override = 0;
2728
2729           /* Prefetch next iteration. */
2730           {
2731             vlib_buffer_t * p2, * p3;
2732
2733             p2 = vlib_get_buffer (vm, from[2]);
2734             p3 = vlib_get_buffer (vm, from[3]);
2735
2736             vlib_prefetch_buffer_header (p2, STORE);
2737             vlib_prefetch_buffer_header (p3, STORE);
2738
2739             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2740             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2741           }
2742
2743           pi0 = to_next[0] = from[0];
2744           pi1 = to_next[1] = from[1];
2745
2746           from += 2;
2747           n_left_from -= 2;
2748           to_next += 2;
2749           n_left_to_next -= 2;
2750       
2751           p0 = vlib_get_buffer (vm, pi0);
2752           p1 = vlib_get_buffer (vm, pi1);
2753
2754           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2755           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2756
2757           /* We should never rewrite a pkt using the MISS adjacency */
2758           ASSERT(adj_index0 && adj_index1);
2759
2760           ip0 = vlib_buffer_get_current (p0);
2761           ip1 = vlib_buffer_get_current (p1);
2762
2763           error0 = error1 = IP4_ERROR_NONE;
2764
2765           /* Decrement TTL & update checksum.
2766              Works either endian, so no need for byte swap. */
2767           if (! rewrite_for_locally_received_packets)
2768             {
2769               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2770
2771               /* Input node should have reject packets with ttl 0. */
2772               ASSERT (ip0->ttl > 0);
2773               ASSERT (ip1->ttl > 0);
2774
2775               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2776               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2777
2778               checksum0 += checksum0 >= 0xffff;
2779               checksum1 += checksum1 >= 0xffff;
2780
2781               ip0->checksum = checksum0;
2782               ip1->checksum = checksum1;
2783
2784               ttl0 -= 1;
2785               ttl1 -= 1;
2786
2787               ip0->ttl = ttl0;
2788               ip1->ttl = ttl1;
2789
2790               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2791               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2792
2793               /* Verify checksum. */
2794               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2795               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2796             }
2797
2798           /* Rewrite packet header and updates lengths. */
2799           adj0 = ip_get_adjacency (lm, adj_index0);
2800           adj1 = ip_get_adjacency (lm, adj_index1);
2801       
2802           if (rewrite_for_locally_received_packets)
2803             {
2804               /*
2805                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2806                * we end up here with a local adjacency in hand
2807                * The local adj rewrite data is 0xfefe on purpose.
2808                * Bad engineer, no donut for you.
2809                */
2810               if (PREDICT_FALSE(adj0->lookup_next_index 
2811                                 == IP_LOOKUP_NEXT_LOCAL))
2812                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2813               if (PREDICT_FALSE(adj0->lookup_next_index
2814                                 == IP_LOOKUP_NEXT_ARP))
2815                 next0_override = IP4_REWRITE_NEXT_ARP;
2816               if (PREDICT_FALSE(adj1->lookup_next_index 
2817                                 == IP_LOOKUP_NEXT_LOCAL))
2818                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2819               if (PREDICT_FALSE(adj1->lookup_next_index
2820                                 == IP_LOOKUP_NEXT_ARP))
2821                 next1_override = IP4_REWRITE_NEXT_ARP;
2822             }
2823
2824           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2825           rw_len0 = adj0[0].rewrite_header.data_bytes;
2826           rw_len1 = adj1[0].rewrite_header.data_bytes;
2827           next0 = (error0 == IP4_ERROR_NONE) 
2828             ? adj0[0].rewrite_header.next_index : 0;
2829
2830           if (rewrite_for_locally_received_packets)
2831               next0 = next0 && next0_override ? next0_override : next0;
2832
2833           next1 = (error1 == IP4_ERROR_NONE)
2834             ? adj1[0].rewrite_header.next_index : 0;
2835
2836           if (rewrite_for_locally_received_packets)
2837               next1 = next1 && next1_override ? next1_override : next1;
2838
2839           /* 
2840            * We've already accounted for an ethernet_header_t elsewhere
2841            */
2842           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2843               vlib_increment_combined_counter 
2844                   (&lm->adjacency_counters,
2845                    cpu_index, adj_index0, 
2846                    /* packet increment */ 0,
2847                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2848
2849           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2850               vlib_increment_combined_counter 
2851                   (&lm->adjacency_counters,
2852                    cpu_index, adj_index1, 
2853                    /* packet increment */ 0,
2854                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2855
2856           /* Check MTU of outgoing interface. */
2857           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2858                     ? IP4_ERROR_MTU_EXCEEDED
2859                     : error0);
2860           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2861                     ? IP4_ERROR_MTU_EXCEEDED
2862                     : error1);
2863
2864           p0->current_data -= rw_len0;
2865           p1->current_data -= rw_len1;
2866
2867           p0->current_length += rw_len0;
2868           p1->current_length += rw_len1;
2869
2870           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2871           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2872       
2873           p0->error = error_node->errors[error0];
2874           p1->error = error_node->errors[error1];
2875
2876           /* Guess we are only writing on simple Ethernet header. */
2877           vnet_rewrite_two_headers (adj0[0], adj1[0],
2878                                     ip0, ip1,
2879                                     sizeof (ethernet_header_t));
2880       
2881           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2882                                            to_next, n_left_to_next,
2883                                            pi0, pi1, next0, next1);
2884         }
2885
2886       while (n_left_from > 0 && n_left_to_next > 0)
2887         {
2888           ip_adjacency_t * adj0;
2889           vlib_buffer_t * p0;
2890           ip4_header_t * ip0;
2891           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2892           u32 next0_override;
2893       
2894           if (rewrite_for_locally_received_packets)
2895               next0_override = 0;
2896
2897           pi0 = to_next[0] = from[0];
2898
2899           p0 = vlib_get_buffer (vm, pi0);
2900
2901           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2902
2903           /* We should never rewrite a pkt using the MISS adjacency */
2904           ASSERT(adj_index0);
2905
2906           adj0 = ip_get_adjacency (lm, adj_index0);
2907       
2908           ip0 = vlib_buffer_get_current (p0);
2909
2910           error0 = IP4_ERROR_NONE;
2911           next0 = 0;            /* drop on error */
2912
2913           /* Decrement TTL & update checksum. */
2914           if (! rewrite_for_locally_received_packets)
2915             {
2916               i32 ttl0 = ip0->ttl;
2917
2918               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2919
2920               checksum0 += checksum0 >= 0xffff;
2921
2922               ip0->checksum = checksum0;
2923
2924               ASSERT (ip0->ttl > 0);
2925
2926               ttl0 -= 1;
2927
2928               ip0->ttl = ttl0;
2929
2930               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2931
2932               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2933             }
2934
2935           if (rewrite_for_locally_received_packets)
2936             {
2937               /*
2938                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2939                * we end up here with a local adjacency in hand
2940                * The local adj rewrite data is 0xfefe on purpose.
2941                * Bad engineer, no donut for you.
2942                */
2943               if (PREDICT_FALSE(adj0->lookup_next_index 
2944                                 == IP_LOOKUP_NEXT_LOCAL))
2945                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2946               /* 
2947                * We have to override the next_index in ARP adjacencies,
2948                * because they're set up for ip4-arp, not this node...
2949                */
2950               if (PREDICT_FALSE(adj0->lookup_next_index
2951                                 == IP_LOOKUP_NEXT_ARP))
2952                 next0_override = IP4_REWRITE_NEXT_ARP;
2953             }
2954
2955           /* Guess we are only writing on simple Ethernet header. */
2956           vnet_rewrite_one_header (adj0[0], ip0, 
2957                                    sizeof (ethernet_header_t));
2958           
2959           /* Update packet buffer attributes/set output interface. */
2960           rw_len0 = adj0[0].rewrite_header.data_bytes;
2961           
2962           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2963               vlib_increment_combined_counter 
2964                   (&lm->adjacency_counters,
2965                    cpu_index, adj_index0, 
2966                    /* packet increment */ 0,
2967                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2968           
2969           /* Check MTU of outgoing interface. */
2970           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2971                     > adj0[0].rewrite_header.max_l3_packet_bytes
2972                     ? IP4_ERROR_MTU_EXCEEDED
2973                     : error0);
2974           
2975           p0->error = error_node->errors[error0];
2976           p0->current_data -= rw_len0;
2977           p0->current_length += rw_len0;
2978           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2979             adj0[0].rewrite_header.sw_if_index;
2980           
2981           next0 = (error0 == IP4_ERROR_NONE)
2982             ? adj0[0].rewrite_header.next_index : 0;
2983
2984           if (rewrite_for_locally_received_packets)
2985               next0 = next0 && next0_override ? next0_override : next0;
2986
2987           from += 1;
2988           n_left_from -= 1;
2989           to_next += 1;
2990           n_left_to_next -= 1;
2991       
2992           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2993                                            to_next, n_left_to_next,
2994                                            pi0, next0);
2995         }
2996   
2997       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2998     }
2999
3000   /* Need to do trace after rewrites to pick up new packet data. */
3001   if (node->flags & VLIB_NODE_FLAG_TRACE)
3002     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3003
3004   return frame->n_vectors;
3005 }
3006
3007 static uword
3008 ip4_rewrite_transit (vlib_main_t * vm,
3009                      vlib_node_runtime_t * node,
3010                      vlib_frame_t * frame)
3011 {
3012   return ip4_rewrite_inline (vm, node, frame,
3013                              /* rewrite_for_locally_received_packets */ 0);
3014 }
3015
3016 static uword
3017 ip4_rewrite_local (vlib_main_t * vm,
3018                    vlib_node_runtime_t * node,
3019                    vlib_frame_t * frame)
3020 {
3021   return ip4_rewrite_inline (vm, node, frame,
3022                              /* rewrite_for_locally_received_packets */ 1);
3023 }
3024
3025 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3026   .function = ip4_rewrite_transit,
3027   .name = "ip4-rewrite-transit",
3028   .vector_size = sizeof (u32),
3029
3030   .format_trace = format_ip4_forward_next_trace,
3031
3032   .n_next_nodes = 2,
3033   .next_nodes = {
3034     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3035     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3036   },
3037 };
3038
3039 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
3040   .function = ip4_rewrite_local,
3041   .name = "ip4-rewrite-local",
3042   .vector_size = sizeof (u32),
3043
3044   .sibling_of = "ip4-rewrite-transit",
3045
3046   .format_trace = format_ip4_forward_next_trace,
3047
3048   .n_next_nodes = 2,
3049   .next_nodes = {
3050     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3051     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3052   },
3053 };
3054
3055 static clib_error_t *
3056 add_del_interface_table (vlib_main_t * vm,
3057                          unformat_input_t * input,
3058                          vlib_cli_command_t * cmd)
3059 {
3060   vnet_main_t * vnm = vnet_get_main();
3061   clib_error_t * error = 0;
3062   u32 sw_if_index, table_id;
3063
3064   sw_if_index = ~0;
3065
3066   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3067     {
3068       error = clib_error_return (0, "unknown interface `%U'",
3069                                  format_unformat_error, input);
3070       goto done;
3071     }
3072
3073   if (unformat (input, "%d", &table_id))
3074     ;
3075   else
3076     {
3077       error = clib_error_return (0, "expected table id `%U'",
3078                                  format_unformat_error, input);
3079       goto done;
3080     }
3081
3082   {
3083     ip4_main_t * im = &ip4_main;
3084     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3085
3086     if (fib) 
3087       {
3088         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3089         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3090     }
3091   }
3092
3093  done:
3094   return error;
3095 }
3096
3097 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3098   .path = "set interface ip table",
3099   .function = add_del_interface_table,
3100   .short_help = "Add/delete FIB table id for interface",
3101 };
3102
3103
3104 static uword
3105 ip4_lookup_multicast (vlib_main_t * vm,
3106                       vlib_node_runtime_t * node,
3107                       vlib_frame_t * frame)
3108 {
3109   ip4_main_t * im = &ip4_main;
3110   ip_lookup_main_t * lm = &im->lookup_main;
3111   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3112   u32 n_left_from, n_left_to_next, * from, * to_next;
3113   ip_lookup_next_t next;
3114   u32 cpu_index = os_get_cpu_number();
3115
3116   from = vlib_frame_vector_args (frame);
3117   n_left_from = frame->n_vectors;
3118   next = node->cached_next_index;
3119
3120   while (n_left_from > 0)
3121     {
3122       vlib_get_next_frame (vm, node, next,
3123                            to_next, n_left_to_next);
3124
3125       while (n_left_from >= 4 && n_left_to_next >= 2)
3126         {
3127           vlib_buffer_t * p0, * p1;
3128           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3129           ip_lookup_next_t next0, next1;
3130           ip4_header_t * ip0, * ip1;
3131           ip_adjacency_t * adj0, * adj1;
3132           u32 fib_index0, fib_index1;
3133           u32 flow_hash_config0, flow_hash_config1;
3134
3135           /* Prefetch next iteration. */
3136           {
3137             vlib_buffer_t * p2, * p3;
3138
3139             p2 = vlib_get_buffer (vm, from[2]);
3140             p3 = vlib_get_buffer (vm, from[3]);
3141
3142             vlib_prefetch_buffer_header (p2, LOAD);
3143             vlib_prefetch_buffer_header (p3, LOAD);
3144
3145             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3146             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3147           }
3148
3149           pi0 = to_next[0] = from[0];
3150           pi1 = to_next[1] = from[1];
3151
3152           p0 = vlib_get_buffer (vm, pi0);
3153           p1 = vlib_get_buffer (vm, pi1);
3154
3155           ip0 = vlib_buffer_get_current (p0);
3156           ip1 = vlib_buffer_get_current (p1);
3157
3158           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3159           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3160           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3161             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3162           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3163             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3164
3165           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3166                                               &ip0->dst_address, p0);
3167           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3168                                               &ip1->dst_address, p1);
3169
3170           adj0 = ip_get_adjacency (lm, adj_index0);
3171           adj1 = ip_get_adjacency (lm, adj_index1);
3172
3173           next0 = adj0->lookup_next_index;
3174           next1 = adj1->lookup_next_index;
3175
3176           flow_hash_config0 = 
3177               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3178
3179           flow_hash_config1 = 
3180               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3181
3182           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3183               (ip0, flow_hash_config0);
3184                                                                   
3185           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3186               (ip1, flow_hash_config1);
3187
3188           ASSERT (adj0->n_adj > 0);
3189           ASSERT (adj1->n_adj > 0);
3190           ASSERT (is_pow2 (adj0->n_adj));
3191           ASSERT (is_pow2 (adj1->n_adj));
3192           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3193           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3194
3195           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3196           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3197
3198           if (1) /* $$$$$$ HACK FIXME */
3199           vlib_increment_combined_counter 
3200               (cm, cpu_index, adj_index0, 1,
3201                vlib_buffer_length_in_chain (vm, p0));
3202           if (1) /* $$$$$$ HACK FIXME */
3203           vlib_increment_combined_counter 
3204               (cm, cpu_index, adj_index1, 1,
3205                vlib_buffer_length_in_chain (vm, p1));
3206
3207           from += 2;
3208           to_next += 2;
3209           n_left_to_next -= 2;
3210           n_left_from -= 2;
3211
3212           wrong_next = (next0 != next) + 2*(next1 != next);
3213           if (PREDICT_FALSE (wrong_next != 0))
3214             {
3215               switch (wrong_next)
3216                 {
3217                 case 1:
3218                   /* A B A */
3219                   to_next[-2] = pi1;
3220                   to_next -= 1;
3221                   n_left_to_next += 1;
3222                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3223                   break;
3224
3225                 case 2:
3226                   /* A A B */
3227                   to_next -= 1;
3228                   n_left_to_next += 1;
3229                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3230                   break;
3231
3232                 case 3:
3233                   /* A B C */
3234                   to_next -= 2;
3235                   n_left_to_next += 2;
3236                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3237                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3238                   if (next0 == next1)
3239                     {
3240                       /* A B B */
3241                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3242                       next = next1;
3243                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3244                     }
3245                 }
3246             }
3247         }
3248     
3249       while (n_left_from > 0 && n_left_to_next > 0)
3250         {
3251           vlib_buffer_t * p0;
3252           ip4_header_t * ip0;
3253           u32 pi0, adj_index0;
3254           ip_lookup_next_t next0;
3255           ip_adjacency_t * adj0;
3256           u32 fib_index0;
3257           u32 flow_hash_config0;
3258
3259           pi0 = from[0];
3260           to_next[0] = pi0;
3261
3262           p0 = vlib_get_buffer (vm, pi0);
3263
3264           ip0 = vlib_buffer_get_current (p0);
3265
3266           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3267                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3268           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3269               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3270           
3271           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3272                                               &ip0->dst_address, p0);
3273
3274           adj0 = ip_get_adjacency (lm, adj_index0);
3275
3276           next0 = adj0->lookup_next_index;
3277
3278           flow_hash_config0 = 
3279               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3280
3281           vnet_buffer (p0)->ip.flow_hash = 
3282             ip4_compute_flow_hash (ip0, flow_hash_config0);
3283
3284           ASSERT (adj0->n_adj > 0);
3285           ASSERT (is_pow2 (adj0->n_adj));
3286           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3287
3288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3289
3290           if (1) /* $$$$$$ HACK FIXME */
3291               vlib_increment_combined_counter 
3292                   (cm, cpu_index, adj_index0, 1,
3293                    vlib_buffer_length_in_chain (vm, p0));
3294
3295           from += 1;
3296           to_next += 1;
3297           n_left_to_next -= 1;
3298           n_left_from -= 1;
3299
3300           if (PREDICT_FALSE (next0 != next))
3301             {
3302               n_left_to_next += 1;
3303               vlib_put_next_frame (vm, node, next, n_left_to_next);
3304               next = next0;
3305               vlib_get_next_frame (vm, node, next,
3306                                    to_next, n_left_to_next);
3307               to_next[0] = pi0;
3308               to_next += 1;
3309               n_left_to_next -= 1;
3310             }
3311         }
3312
3313       vlib_put_next_frame (vm, node, next, n_left_to_next);
3314     }
3315
3316   return frame->n_vectors;
3317 }
3318
3319 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3320   .function = ip4_lookup_multicast,
3321   .name = "ip4-lookup-multicast",
3322   .vector_size = sizeof (u32),
3323
3324   .n_next_nodes = IP_LOOKUP_N_NEXT,
3325   .next_nodes = {
3326     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
3327     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
3328     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
3329     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
3330     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
3331     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
3332     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
3333     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
3334     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
3335     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
3336     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
3337     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
3338     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
3339   },
3340 };
3341
3342 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3343   .function = ip4_drop,
3344   .name = "ip4-multicast",
3345   .vector_size = sizeof (u32),
3346
3347   .format_trace = format_ip4_forward_next_trace,
3348
3349   .n_next_nodes = 1,
3350   .next_nodes = {
3351     [0] = "error-drop",
3352   },
3353 };
3354
3355 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3356 {
3357   ip4_main_t * im = &ip4_main;
3358   ip4_fib_mtrie_t * mtrie0;
3359   ip4_fib_mtrie_leaf_t leaf0;
3360   u32 adj_index0;
3361     
3362   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3363
3364   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3365   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3366   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3367   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3368   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3369   
3370   /* Handle default route. */
3371   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3372   
3373   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3374   
3375   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3376                                                   a, 
3377                                                   /* no_default_route */ 0);
3378 }
3379  
3380 static clib_error_t *
3381 test_lookup_command_fn (vlib_main_t * vm,
3382                         unformat_input_t * input,
3383                         vlib_cli_command_t * cmd)
3384 {
3385   u32 table_id = 0;
3386   f64 count = 1;
3387   u32 n;
3388   int i;
3389   ip4_address_t ip4_base_address;
3390   u64 errors = 0;
3391
3392   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3393       if (unformat (input, "table %d", &table_id))
3394         ;
3395       else if (unformat (input, "count %f", &count))
3396         ;
3397
3398       else if (unformat (input, "%U",
3399                          unformat_ip4_address, &ip4_base_address))
3400         ;
3401       else
3402         return clib_error_return (0, "unknown input `%U'",
3403                                   format_unformat_error, input);
3404   }
3405
3406   n = count;
3407
3408   for (i = 0; i < n; i++)
3409     {
3410       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3411         errors++;
3412
3413       ip4_base_address.as_u32 = 
3414         clib_host_to_net_u32 (1 + 
3415                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3416     }
3417
3418   if (errors) 
3419     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3420   else
3421     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3422
3423   return 0;
3424 }
3425
3426 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3427     .path = "test lookup",
3428     .short_help = "test lookup",
3429     .function = test_lookup_command_fn,
3430 };
3431
3432 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3433 {
3434   ip4_main_t * im4 = &ip4_main;
3435   ip4_fib_t * fib;
3436   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3437
3438   if (p == 0)
3439     return VNET_API_ERROR_NO_SUCH_FIB;
3440
3441   fib = vec_elt_at_index (im4->fibs, p[0]);
3442
3443   fib->flow_hash_config = flow_hash_config;
3444   return 0;
3445 }
3446  
3447 static clib_error_t *
3448 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3449                              unformat_input_t * input,
3450                              vlib_cli_command_t * cmd)
3451 {
3452   int matched = 0;
3453   u32 table_id = 0;
3454   u32 flow_hash_config = 0;
3455   int rv;
3456
3457   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3458     if (unformat (input, "table %d", &table_id))
3459       matched = 1;
3460 #define _(a,v) \
3461     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3462     foreach_flow_hash_bit
3463 #undef _
3464     else break;
3465   }
3466   
3467   if (matched == 0)
3468     return clib_error_return (0, "unknown input `%U'",
3469                               format_unformat_error, input);
3470   
3471   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3472   switch (rv)
3473     {
3474     case 0:
3475       break;
3476       
3477     case VNET_API_ERROR_NO_SUCH_FIB:
3478       return clib_error_return (0, "no such FIB table %d", table_id);
3479       
3480     default:
3481       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3482       break;
3483     }
3484   
3485   return 0;
3486 }
3487  
3488 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3489   .path = "set ip flow-hash",
3490   .short_help = 
3491   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3492   .function = set_ip_flow_hash_command_fn,
3493 };
3494  
3495 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3496                                  u32 table_index)
3497 {
3498   vnet_main_t * vnm = vnet_get_main();
3499   vnet_interface_main_t * im = &vnm->interface_main;
3500   ip4_main_t * ipm = &ip4_main;
3501   ip_lookup_main_t * lm = &ipm->lookup_main;
3502   vnet_classify_main_t * cm = &vnet_classify_main;
3503
3504   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3505     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3506
3507   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3508     return VNET_API_ERROR_NO_SUCH_ENTRY;
3509
3510   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3511   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3512
3513   return 0;
3514 }
3515
3516 static clib_error_t *
3517 set_ip_classify_command_fn (vlib_main_t * vm,
3518                             unformat_input_t * input,
3519                             vlib_cli_command_t * cmd)
3520 {
3521   u32 table_index = ~0;
3522   int table_index_set = 0;
3523   u32 sw_if_index = ~0;
3524   int rv;
3525   
3526   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3527     if (unformat (input, "table-index %d", &table_index))
3528       table_index_set = 1;
3529     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3530                        vnet_get_main(), &sw_if_index))
3531       ;
3532     else
3533       break;
3534   }
3535       
3536   if (table_index_set == 0)
3537     return clib_error_return (0, "classify table-index must be specified");
3538
3539   if (sw_if_index == ~0)
3540     return clib_error_return (0, "interface / subif must be specified");
3541
3542   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3543
3544   switch (rv)
3545     {
3546     case 0:
3547       break;
3548
3549     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3550       return clib_error_return (0, "No such interface");
3551
3552     case VNET_API_ERROR_NO_SUCH_ENTRY:
3553       return clib_error_return (0, "No such classifier table");
3554     }
3555   return 0;
3556 }
3557
3558 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3559     .path = "set ip classify",
3560     .short_help = 
3561     "set ip classify intfc <int> table-index <index>",
3562     .function = set_ip_classify_command_fn,
3563 };
3564