VPP-189 More coverity bug fixes
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       /* hash_get should never return NULL here */
197       if (p)
198           clib_memcpy (p, fib->new_hash_values, 
199                        vec_bytes (fib->new_hash_values));
200       else
201           ASSERT(0);
202     }
203 }
204
205 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
206 {
207   ip_lookup_main_t * lm = &im->lookup_main;
208   ip4_fib_t * fib;
209   u32 dst_address, dst_address_length, adj_index, old_adj_index;
210   uword * hash, is_del;
211   ip4_add_del_route_callback_t * cb;
212
213   /* Either create new adjacency or use given one depending on arguments. */
214   if (a->n_add_adj > 0)
215     {
216       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
217       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
218     }
219   else
220     adj_index = a->adj_index;
221
222   dst_address = a->dst_address.data_u32;
223   dst_address_length = a->dst_address_length;
224   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
225
226   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
227   dst_address &= im->fib_masks[dst_address_length];
228
229   if (! fib->adj_index_by_dst_address[dst_address_length])
230     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
231
232   hash = fib->adj_index_by_dst_address[dst_address_length];
233
234   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
235
236   if (is_del)
237     {
238       fib->old_hash_values[0] = ~0;
239       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
240       fib->adj_index_by_dst_address[dst_address_length] = hash;
241
242       if (vec_len (im->add_del_route_callbacks) > 0
243           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
244         {
245           fib->new_hash_values[0] = ~0;
246           vec_foreach (cb, im->add_del_route_callbacks)
247             if ((a->flags & cb->required_flags) == cb->required_flags)
248               cb->function (im, cb->function_opaque,
249                             fib, a->flags,
250                             &a->dst_address, dst_address_length,
251                             fib->old_hash_values,
252                             fib->new_hash_values);
253         }
254     }
255   else
256     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
257                            adj_index);
258
259   old_adj_index = fib->old_hash_values[0];
260
261   /* Avoid spurious reference count increments */
262   if (old_adj_index == adj_index
263       && adj_index != ~0
264       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
265     {
266       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
267       if (adj->share_count > 0)
268         adj->share_count --;
269     }
270
271   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
272                                is_del ? old_adj_index : adj_index,
273                                is_del);
274
275   /* Delete old adjacency index if present and changed. */
276   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
277       && old_adj_index != ~0
278       && old_adj_index != adj_index)
279     ip_del_adjacency (lm, old_adj_index);
280 }
281
282
283 u32
284 ip4_route_get_next_hop_adj (ip4_main_t * im,
285                             u32 fib_index,
286                             ip4_address_t *next_hop,
287                             u32 next_hop_sw_if_index,
288                             u32 explicit_fib_index)
289 {
290   ip_lookup_main_t * lm = &im->lookup_main;
291   vnet_main_t * vnm = vnet_get_main();
292   uword * nh_hash, * nh_result;
293   int is_interface_next_hop;
294   u32 nh_adj_index;
295   ip4_fib_t * fib;
296
297   fib = vec_elt_at_index (im->fibs, fib_index);
298
299   is_interface_next_hop = next_hop->data_u32 == 0;
300   if (is_interface_next_hop)
301     {
302       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
303       if (nh_result)
304           nh_adj_index = *nh_result;
305       else
306         {
307            ip_adjacency_t * adj;
308            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
309                                    &nh_adj_index);
310            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
311            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
312            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
313         }
314     }
315   else if (next_hop_sw_if_index == ~0)
316     {
317       /* next-hop is recursive. we always need a indirect adj
318        * for recursive paths. Any LPM we perform now will give
319        * us a valid adj, but without tracking the next-hop we
320        * have no way to keep it valid.
321        */
322       ip_adjacency_t add_adj;
323       memset (&add_adj, 0, sizeof(add_adj));
324       add_adj.n_adj = 1;
325       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
326       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
327       add_adj.explicit_fib_index = explicit_fib_index;
328       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
329     }
330   else
331     {
332       nh_hash = fib->adj_index_by_dst_address[32];
333       nh_result = hash_get (nh_hash, next_hop->data_u32);
334
335       /* Next hop must be known. */
336       if (! nh_result)
337         {
338           ip_adjacency_t * adj;
339
340           /* no /32 exists, get the longest prefix match */
341           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
342                                                     next_hop, 0);
343           adj = ip_get_adjacency (lm, nh_adj_index);
344           /* if ARP interface adjacency is present, we need to
345              install ARP adjaceny for specific next hop */
346           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
347               adj->arp.next_hop.ip4.as_u32 == 0)
348             {
349               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
350             }
351         }
352       else
353         {
354           nh_adj_index = *nh_result;
355         }
356     }
357
358   return (nh_adj_index);
359 }
360
361 void
362 ip4_add_del_route_next_hop (ip4_main_t * im,
363                             u32 flags,
364                             ip4_address_t * dst_address,
365                             u32 dst_address_length,
366                             ip4_address_t * next_hop,
367                             u32 next_hop_sw_if_index,
368                             u32 next_hop_weight, u32 adj_index, 
369                             u32 explicit_fib_index)
370 {
371   vnet_main_t * vnm = vnet_get_main();
372   ip_lookup_main_t * lm = &im->lookup_main;
373   u32 fib_index;
374   ip4_fib_t * fib;
375   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
376   u32 dst_adj_index, nh_adj_index;
377   uword * dst_hash, * dst_result;
378   ip_adjacency_t * dst_adj;
379   ip_multipath_adjacency_t * old_mp, * new_mp;
380   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
381   clib_error_t * error = 0;
382
383   if (explicit_fib_index == (u32)~0)
384       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
385   else
386       fib_index = explicit_fib_index;
387
388   fib = vec_elt_at_index (im->fibs, fib_index);
389
390   /* Lookup next hop to be added or deleted. */
391   if (adj_index == (u32)~0)
392     {
393         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
394                                                   next_hop,
395                                                   next_hop_sw_if_index,
396                                                   explicit_fib_index);
397     }
398   else
399     {
400       nh_adj_index = adj_index;
401     }
402   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
403   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
404
405   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
406   dst_result = hash_get (dst_hash, dst_address_u32);
407   if (dst_result)
408     {
409       dst_adj_index = dst_result[0];
410       dst_adj = ip_get_adjacency (lm, dst_adj_index);
411     }
412   else
413     {
414       /* For deletes destination must be known. */
415       if (is_del)
416         {
417           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
418           error = clib_error_return (0, "unknown destination %U/%d",
419                                      format_ip4_address, dst_address,
420                                      dst_address_length);
421           goto done;
422         }
423
424       dst_adj_index = ~0;
425       dst_adj = 0;
426     }
427
428   /* Ignore adds of X/32 with next hop of X. */
429   if (! is_del
430       && dst_address_length == 32
431       && dst_address->data_u32 == next_hop->data_u32 
432       && adj_index != (u32)~0)
433     {
434       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
435       error = clib_error_return (0, "prefix matches next hop %U/%d",
436                                  format_ip4_address, dst_address,
437                                  dst_address_length);
438       goto done;
439     }
440
441   /* Destination is not known and default weight is set so add route
442      to existing non-multipath adjacency */
443   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
444     {
445       /* create / delete additional mapping of existing adjacency */
446       ip4_add_del_route_args_t a;
447       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
448
449       a.table_index_or_table_id = fib_index;
450       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
451                  | IP4_ROUTE_FLAG_FIB_INDEX
452                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
453                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
454                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
455       a.dst_address = dst_address[0];
456       a.dst_address_length = dst_address_length;
457       a.adj_index = nh_adj_index;
458       a.add_adj = 0;
459       a.n_add_adj = 0;
460
461       ip4_add_del_route (im, &a);
462
463       /* adjust share count. This cannot be the only use of the adjacency 
464          unless next hop is an indiect adj where share count is already
465          incremented */
466       if (next_hop_sw_if_index != ~0) 
467         nh_adj->share_count += is_del ? -1 : 1;
468         
469       goto done;
470     }
471
472   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
473
474   if (! ip_multipath_adjacency_add_del_next_hop
475       (lm, is_del,
476        old_mp_adj_index,
477        nh_adj_index,
478        next_hop_weight,
479        &new_mp_adj_index))
480     {
481       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
482       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
483                                  format_ip4_address, next_hop);
484       goto done;
485     }
486   
487   old_mp = new_mp = 0;
488   if (old_mp_adj_index != ~0)
489     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
490   if (new_mp_adj_index != ~0)
491     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
492
493   if (old_mp != new_mp)
494     {
495       ip4_add_del_route_args_t a;
496       ip_adjacency_t * adj;
497
498       a.table_index_or_table_id = fib_index;
499       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
500                  | IP4_ROUTE_FLAG_FIB_INDEX
501                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
502                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
503       a.dst_address = dst_address[0];
504       a.dst_address_length = dst_address_length;
505       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
506       a.add_adj = 0;
507       a.n_add_adj = 0;
508
509       ip4_add_del_route (im, &a);
510
511       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
512       if (adj->n_adj == 1)
513         adj->share_count += is_del ? -1 : 1;
514     }
515
516  done:
517   if (error)
518     clib_error_report (error);
519 }
520
521 void *
522 ip4_get_route (ip4_main_t * im,
523                u32 table_index_or_table_id,
524                u32 flags,
525                u8 * address,
526                u32 address_length)
527 {
528   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
529   u32 dst_address = * (u32 *) address;
530   uword * hash, * p;
531
532   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
533   dst_address &= im->fib_masks[address_length];
534
535   hash = fib->adj_index_by_dst_address[address_length];
536   p = hash_get (hash, dst_address);
537   return (void *) p;
538 }
539
540 void
541 ip4_foreach_matching_route (ip4_main_t * im,
542                             u32 table_index_or_table_id,
543                             u32 flags,
544                             ip4_address_t * address,
545                             u32 address_length,
546                             ip4_address_t ** results,
547                             u8 ** result_lengths)
548 {
549   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
550   u32 dst_address = address->data_u32;
551   u32 this_length = address_length;
552   
553   if (*results)
554     _vec_len (*results) = 0;
555   if (*result_lengths)
556     _vec_len (*result_lengths) = 0;
557
558   while (this_length <= 32 && vec_len (results) == 0)
559     {
560       uword k, v;
561       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
562         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
563           {
564             ip4_address_t a;
565             a.data_u32 = k;
566             vec_add1 (*results, a);
567             vec_add1 (*result_lengths, this_length);
568           }
569       }));
570
571       this_length++;
572     }
573 }
574
575 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
576                                   u32 table_index_or_table_id,
577                                   u32 flags)
578 {
579   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
580   ip_lookup_main_t * lm = &im->lookup_main;
581   u32 i, l;
582   ip4_address_t a;
583   ip4_add_del_route_callback_t * cb;
584   static ip4_address_t * to_delete;
585
586   if (lm->n_adjacency_remaps == 0)
587     return;
588
589   for (l = 0; l <= 32; l++)
590     {
591       hash_pair_t * p;
592       uword * hash = fib->adj_index_by_dst_address[l];
593
594       if (hash_elts (hash) == 0)
595         continue;
596
597       if (to_delete)
598         _vec_len (to_delete) = 0;
599
600       hash_foreach_pair (p, hash, ({
601         u32 adj_index = p->value[0];
602         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
603
604         if (m)
605           {
606             /* Record destination address from hash key. */
607             a.data_u32 = p->key;
608
609             /* New adjacency points to nothing: so delete prefix. */
610             if (m == ~0)
611               vec_add1 (to_delete, a);
612             else
613               {
614                 /* Remap to new adjacency. */
615                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
616
617                 /* Set new adjacency value. */
618                 fib->new_hash_values[0] = p->value[0] = m - 1;
619
620                 vec_foreach (cb, im->add_del_route_callbacks)
621                   if ((flags & cb->required_flags) == cb->required_flags)
622                     cb->function (im, cb->function_opaque,
623                                   fib, flags | IP4_ROUTE_FLAG_ADD,
624                                   &a, l,
625                                   fib->old_hash_values,
626                                   fib->new_hash_values);
627               }
628           }
629       }));
630
631       fib->new_hash_values[0] = ~0;
632       for (i = 0; i < vec_len (to_delete); i++)
633         {
634           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
635           vec_foreach (cb, im->add_del_route_callbacks)
636             if ((flags & cb->required_flags) == cb->required_flags)
637               cb->function (im, cb->function_opaque,
638                             fib, flags | IP4_ROUTE_FLAG_DEL,
639                             &a, l,
640                             fib->old_hash_values,
641                             fib->new_hash_values);
642         }
643     }
644
645   /* Also remap adjacencies in mtrie. */
646   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
647
648   /* Reset mapping table. */
649   vec_zero (lm->adjacency_remap_table);
650
651   /* All remaps have been performed. */
652   lm->n_adjacency_remaps = 0;
653 }
654
655 void ip4_delete_matching_routes (ip4_main_t * im,
656                                  u32 table_index_or_table_id,
657                                  u32 flags,
658                                  ip4_address_t * address,
659                                  u32 address_length)
660 {
661   static ip4_address_t * matching_addresses;
662   static u8 * matching_address_lengths;
663   u32 l, i;
664   ip4_add_del_route_args_t a;
665
666   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
667   a.table_index_or_table_id = table_index_or_table_id;
668   a.adj_index = ~0;
669   a.add_adj = 0;
670   a.n_add_adj = 0;
671
672   for (l = address_length + 1; l <= 32; l++)
673     {
674       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
675                                   address,
676                                   l,
677                                   &matching_addresses,
678                                   &matching_address_lengths);
679       for (i = 0; i < vec_len (matching_addresses); i++)
680         {
681           a.dst_address = matching_addresses[i];
682           a.dst_address_length = matching_address_lengths[i];
683           ip4_add_del_route (im, &a);
684         }
685     }
686
687   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
688 }
689
690 void
691 ip4_forward_next_trace (vlib_main_t * vm,
692                         vlib_node_runtime_t * node,
693                         vlib_frame_t * frame,
694                         vlib_rx_or_tx_t which_adj_index);
695
696 always_inline uword
697 ip4_lookup_inline (vlib_main_t * vm,
698                    vlib_node_runtime_t * node,
699                    vlib_frame_t * frame,
700                    int lookup_for_responses_to_locally_received_packets,
701                    int is_indirect)
702 {
703   ip4_main_t * im = &ip4_main;
704   ip_lookup_main_t * lm = &im->lookup_main;
705   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
706   u32 n_left_from, n_left_to_next, * from, * to_next;
707   ip_lookup_next_t next;
708   u32 cpu_index = os_get_cpu_number();
709
710   from = vlib_frame_vector_args (frame);
711   n_left_from = frame->n_vectors;
712   next = node->cached_next_index;
713
714   while (n_left_from > 0)
715     {
716       vlib_get_next_frame (vm, node, next,
717                            to_next, n_left_to_next);
718
719       while (n_left_from >= 4 && n_left_to_next >= 2)
720         {
721           vlib_buffer_t * p0, * p1;
722           ip4_header_t * ip0, * ip1;
723           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
724           ip_lookup_next_t next0, next1;
725           ip_adjacency_t * adj0, * adj1;
726           ip4_fib_mtrie_t * mtrie0, * mtrie1;
727           ip4_fib_mtrie_leaf_t leaf0, leaf1;
728           ip4_address_t * dst_addr0, *dst_addr1;
729           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
730           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
731           u32 flow_hash_config0, flow_hash_config1;
732           u32 hash_c0, hash_c1;
733           u32 wrong_next;
734
735           /* Prefetch next iteration. */
736           {
737             vlib_buffer_t * p2, * p3;
738
739             p2 = vlib_get_buffer (vm, from[2]);
740             p3 = vlib_get_buffer (vm, from[3]);
741
742             vlib_prefetch_buffer_header (p2, LOAD);
743             vlib_prefetch_buffer_header (p3, LOAD);
744
745             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
746             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
747           }
748
749           pi0 = to_next[0] = from[0];
750           pi1 = to_next[1] = from[1];
751
752           p0 = vlib_get_buffer (vm, pi0);
753           p1 = vlib_get_buffer (vm, pi1);
754
755           ip0 = vlib_buffer_get_current (p0);
756           ip1 = vlib_buffer_get_current (p1);
757
758           if (is_indirect)
759             {
760               ip_adjacency_t * iadj0, * iadj1;
761               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
762               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
763               dst_addr0 = &iadj0->indirect.next_hop.ip4;
764               dst_addr1 = &iadj1->indirect.next_hop.ip4;
765             }
766           else
767             {
768               dst_addr0 = &ip0->dst_address;
769               dst_addr1 = &ip1->dst_address;
770             }
771
772           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
773           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
774           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
775             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
776           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
777             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
778
779
780           if (! lookup_for_responses_to_locally_received_packets)
781             {
782               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
783               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
784
785               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
786
787               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
788               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
789             }
790
791           tcp0 = (void *) (ip0 + 1);
792           tcp1 = (void *) (ip1 + 1);
793
794           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
795                          || ip0->protocol == IP_PROTOCOL_UDP);
796           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
797                          || ip1->protocol == IP_PROTOCOL_UDP);
798
799           if (! lookup_for_responses_to_locally_received_packets)
800             {
801               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
802               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
803             }
804
805           if (! lookup_for_responses_to_locally_received_packets)
806             {
807               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
808               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
809             }
810
811           if (! lookup_for_responses_to_locally_received_packets)
812             {
813               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
814               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
815             }
816
817           if (lookup_for_responses_to_locally_received_packets)
818             {
819               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
820               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
821             }
822           else
823             {
824               /* Handle default route. */
825               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
826               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
827
828               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
829               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
830             }
831
832           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
833                                                            dst_addr0,
834                                                            /* no_default_route */ 0));
835           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
836                                                            dst_addr1,
837                                                            /* no_default_route */ 0));
838           adj0 = ip_get_adjacency (lm, adj_index0);
839           adj1 = ip_get_adjacency (lm, adj_index1);
840
841           next0 = adj0->lookup_next_index;
842           next1 = adj1->lookup_next_index;
843
844           /* Use flow hash to compute multipath adjacency. */
845           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
846           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
847           if (PREDICT_FALSE (adj0->n_adj > 1))
848             {
849               flow_hash_config0 = 
850                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
851               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
852                 ip4_compute_flow_hash (ip0, flow_hash_config0);
853             }
854           if (PREDICT_FALSE(adj1->n_adj > 1))
855             {
856               flow_hash_config1 = 
857                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
858               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
859                 ip4_compute_flow_hash (ip1, flow_hash_config1);
860             }
861
862           ASSERT (adj0->n_adj > 0);
863           ASSERT (adj1->n_adj > 0);
864           ASSERT (is_pow2 (adj0->n_adj));
865           ASSERT (is_pow2 (adj1->n_adj));
866           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
867           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
868
869           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
870           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
871
872           vlib_increment_combined_counter 
873               (cm, cpu_index, adj_index0, 1,
874                vlib_buffer_length_in_chain (vm, p0) 
875                + sizeof(ethernet_header_t));
876           vlib_increment_combined_counter 
877               (cm, cpu_index, adj_index1, 1,
878                vlib_buffer_length_in_chain (vm, p1)
879                + sizeof(ethernet_header_t));
880
881           from += 2;
882           to_next += 2;
883           n_left_to_next -= 2;
884           n_left_from -= 2;
885
886           wrong_next = (next0 != next) + 2*(next1 != next);
887           if (PREDICT_FALSE (wrong_next != 0))
888             {
889               switch (wrong_next)
890                 {
891                 case 1:
892                   /* A B A */
893                   to_next[-2] = pi1;
894                   to_next -= 1;
895                   n_left_to_next += 1;
896                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
897                   break;
898
899                 case 2:
900                   /* A A B */
901                   to_next -= 1;
902                   n_left_to_next += 1;
903                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
904                   break;
905
906                 case 3:
907                   /* A B C */
908                   to_next -= 2;
909                   n_left_to_next += 2;
910                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
911                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
912                   if (next0 == next1)
913                     {
914                       /* A B B */
915                       vlib_put_next_frame (vm, node, next, n_left_to_next);
916                       next = next1;
917                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
918                     }
919                 }
920             }
921         }
922     
923       while (n_left_from > 0 && n_left_to_next > 0)
924         {
925           vlib_buffer_t * p0;
926           ip4_header_t * ip0;
927           __attribute__((unused)) tcp_header_t * tcp0;
928           ip_lookup_next_t next0;
929           ip_adjacency_t * adj0;
930           ip4_fib_mtrie_t * mtrie0;
931           ip4_fib_mtrie_leaf_t leaf0;
932           ip4_address_t * dst_addr0;
933           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
934           u32 flow_hash_config0, hash_c0;
935
936           pi0 = from[0];
937           to_next[0] = pi0;
938
939           p0 = vlib_get_buffer (vm, pi0);
940
941           ip0 = vlib_buffer_get_current (p0);
942
943           if (is_indirect)
944             {
945               ip_adjacency_t * iadj0;
946               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
947               dst_addr0 = &iadj0->indirect.next_hop.ip4;
948             }
949           else
950             {
951               dst_addr0 = &ip0->dst_address;
952             }
953
954           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
955           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
956             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
957
958           if (! lookup_for_responses_to_locally_received_packets)
959             {
960               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
961
962               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
963
964               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
965             }
966
967           tcp0 = (void *) (ip0 + 1);
968
969           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
970                          || ip0->protocol == IP_PROTOCOL_UDP);
971
972           if (! lookup_for_responses_to_locally_received_packets)
973             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
974
975           if (! lookup_for_responses_to_locally_received_packets)
976             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
977
978           if (! lookup_for_responses_to_locally_received_packets)
979             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
980
981           if (lookup_for_responses_to_locally_received_packets)
982             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
983           else
984             {
985               /* Handle default route. */
986               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
987               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
988             }
989
990           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
991                                                            dst_addr0,
992                                                            /* no_default_route */ 0));
993
994           adj0 = ip_get_adjacency (lm, adj_index0);
995
996           next0 = adj0->lookup_next_index;
997
998           /* Use flow hash to compute multipath adjacency. */
999           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1000           if (PREDICT_FALSE(adj0->n_adj > 1))
1001             {
1002               flow_hash_config0 = 
1003                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1004
1005               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1006                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1007             }
1008
1009           ASSERT (adj0->n_adj > 0);
1010           ASSERT (is_pow2 (adj0->n_adj));
1011           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1012
1013           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1014
1015           vlib_increment_combined_counter 
1016               (cm, cpu_index, adj_index0, 1,
1017                vlib_buffer_length_in_chain (vm, p0)
1018                + sizeof(ethernet_header_t));
1019
1020           from += 1;
1021           to_next += 1;
1022           n_left_to_next -= 1;
1023           n_left_from -= 1;
1024
1025           if (PREDICT_FALSE (next0 != next))
1026             {
1027               n_left_to_next += 1;
1028               vlib_put_next_frame (vm, node, next, n_left_to_next);
1029               next = next0;
1030               vlib_get_next_frame (vm, node, next,
1031                                    to_next, n_left_to_next);
1032               to_next[0] = pi0;
1033               to_next += 1;
1034               n_left_to_next -= 1;
1035             }
1036         }
1037
1038       vlib_put_next_frame (vm, node, next, n_left_to_next);
1039     }
1040
1041   if (node->flags & VLIB_NODE_FLAG_TRACE)
1042     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1043
1044   return frame->n_vectors;
1045 }
1046
1047 /** @brief IPv4 lookup node.
1048     @node ip4-lookup
1049
1050     This is the main IPv4 lookup dispatch node.
1051
1052     @param vm vlib_main_t corresponding to the current thread
1053     @param node vlib_node_runtime_t
1054     @param frame vlib_frame_t whose contents should be dispatched
1055
1056     @par Graph mechanics: buffer metadata, next index usage
1057
1058     @em Uses:
1059     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1060         - Indicates the @c sw_if_index value of the interface that the
1061           packet was received on.
1062     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1063         - When the value is @c ~0 then the node performs a longest prefix
1064           match (LPM) for the packet destination address in the FIB attached
1065           to the receive interface.
1066         - Otherwise perform LPM for the packet destination address in the
1067           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1068           value (0, 1, ...) and not a VRF id.
1069
1070     @em Sets:
1071     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1072         - The lookup result adjacency index.
1073
1074     <em>Next Index:</em>
1075     - Dispatches the packet to the node index found in
1076       ip_adjacency_t @c adj->lookup_next_index
1077       (where @c adj is the lookup result adjacency).
1078 */
1079 static uword
1080 ip4_lookup (vlib_main_t * vm,
1081             vlib_node_runtime_t * node,
1082             vlib_frame_t * frame)
1083 {
1084   return ip4_lookup_inline (vm, node, frame,
1085                             /* lookup_for_responses_to_locally_received_packets */ 0,
1086                             /* is_indirect */ 0);
1087
1088 }
1089
1090 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1091                                         ip_adjacency_t * adj,
1092                                         u32 sw_if_index,
1093                                         u32 if_address_index)
1094 {
1095   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1096   ip_lookup_next_t n;
1097   vnet_l3_packet_type_t packet_type;
1098   u32 node_index;
1099
1100   if (hw->hw_class_index == ethernet_hw_interface_class.index
1101       || hw->hw_class_index == srp_hw_interface_class.index)
1102     {
1103       /* 
1104        * We have a bit of a problem in this case. ip4-arp uses
1105        * the rewrite_header.next_index to hand pkts to the
1106        * indicated inteface output node. We can end up in
1107        * ip4_rewrite_local, too, which also pays attention to 
1108        * rewrite_header.next index. Net result: a hack in
1109        * ip4_rewrite_local...
1110        */
1111       n = IP_LOOKUP_NEXT_ARP;
1112       node_index = ip4_arp_node.index;
1113       adj->if_address_index = if_address_index;
1114       adj->arp.next_hop.ip4.as_u32 = 0;
1115       ip46_address_reset(&adj->arp.next_hop);
1116       packet_type = VNET_L3_PACKET_TYPE_ARP;
1117     }
1118   else
1119     {
1120       n = IP_LOOKUP_NEXT_REWRITE;
1121       node_index = ip4_rewrite_node.index;
1122       packet_type = VNET_L3_PACKET_TYPE_IP4;
1123     }
1124
1125   adj->lookup_next_index = n;
1126   vnet_rewrite_for_sw_interface
1127     (vnm,
1128      packet_type,
1129      sw_if_index,
1130      node_index,
1131      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1132      &adj->rewrite_header,
1133      sizeof (adj->rewrite_data));
1134 }
1135
1136 static void
1137 ip4_add_interface_routes (u32 sw_if_index,
1138                           ip4_main_t * im, u32 fib_index,
1139                           ip_interface_address_t * a)
1140 {
1141   vnet_main_t * vnm = vnet_get_main();
1142   ip_lookup_main_t * lm = &im->lookup_main;
1143   ip_adjacency_t * adj;
1144   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1145   ip4_add_del_route_args_t x;
1146   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1147   u32 classify_table_index;
1148
1149   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1150   x.table_index_or_table_id = fib_index;
1151   x.flags = (IP4_ROUTE_FLAG_ADD
1152              | IP4_ROUTE_FLAG_FIB_INDEX
1153              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1154   x.dst_address = address[0];
1155   x.dst_address_length = a->address_length;
1156   x.n_add_adj = 0;
1157   x.add_adj = 0;
1158
1159   a->neighbor_probe_adj_index = ~0;
1160   if (a->address_length < 32)
1161     {
1162       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1163                               &x.adj_index);
1164       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1165       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1166       ip4_add_del_route (im, &x);
1167       a->neighbor_probe_adj_index = x.adj_index;
1168     }
1169   
1170   /* Add e.g. 1.1.1.1/32 as local to this host. */
1171   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1172                           &x.adj_index);
1173   
1174   classify_table_index = ~0;
1175   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1176     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1177   if (classify_table_index != (u32) ~0)
1178     {
1179       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1180       adj->classify.table_index = classify_table_index;
1181     }
1182   else
1183     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1184   
1185   adj->if_address_index = a - lm->if_address_pool;
1186   adj->rewrite_header.sw_if_index = sw_if_index;
1187   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1188   /* 
1189    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1190    * fail an RPF-ish check, but still go thru the rewrite code...
1191    */
1192   adj->rewrite_header.data_bytes = 0;
1193
1194   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1195   x.dst_address_length = 32;
1196   ip4_add_del_route (im, &x);
1197 }
1198
1199 static void
1200 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1201 {
1202   ip4_add_del_route_args_t x;
1203
1204   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1205   x.table_index_or_table_id = fib_index;
1206   x.flags = (IP4_ROUTE_FLAG_DEL
1207              | IP4_ROUTE_FLAG_FIB_INDEX
1208              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1209   x.dst_address = address[0];
1210   x.dst_address_length = address_length;
1211   x.adj_index = ~0;
1212   x.n_add_adj = 0;
1213   x.add_adj = 0;
1214
1215   if (address_length < 32)
1216     ip4_add_del_route (im, &x);
1217
1218   x.dst_address_length = 32;
1219   ip4_add_del_route (im, &x);
1220
1221   ip4_delete_matching_routes (im,
1222                               fib_index,
1223                               IP4_ROUTE_FLAG_FIB_INDEX,
1224                               address,
1225                               address_length);
1226 }
1227
1228 typedef struct {
1229     u32 sw_if_index;
1230     ip4_address_t address;
1231     u32 length;
1232 } ip4_interface_address_t;
1233
1234 static clib_error_t *
1235 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1236                                         u32 sw_if_index,
1237                                         ip4_address_t * new_address,
1238                                         u32 new_length,
1239                                         u32 redistribute,
1240                                         u32 insert_routes,
1241                                         u32 is_del);
1242
1243 static clib_error_t *
1244 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1245                                         u32 sw_if_index,
1246                                         ip4_address_t * address,
1247                                         u32 address_length,
1248                                         u32 redistribute,
1249                                         u32 insert_routes,
1250                                         u32 is_del)
1251 {
1252   vnet_main_t * vnm = vnet_get_main();
1253   ip4_main_t * im = &ip4_main;
1254   ip_lookup_main_t * lm = &im->lookup_main;
1255   clib_error_t * error = 0;
1256   u32 if_address_index, elts_before;
1257   ip4_address_fib_t ip4_af, * addr_fib = 0;
1258
1259   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1260   ip4_addr_fib_init (&ip4_af, address,
1261                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1262   vec_add1 (addr_fib, ip4_af);
1263
1264   /* When adding an address check that it does not conflict with an existing address. */
1265   if (! is_del)
1266     {
1267       ip_interface_address_t * ia;
1268       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1269                                     0 /* honor unnumbered */,
1270       ({
1271         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1272
1273         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1274             || ip4_destination_matches_route (im, x, address, address_length))
1275           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1276                                     format_ip4_address_and_length, address, address_length,
1277                                     format_ip4_address_and_length, x, ia->address_length,
1278                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1279       }));
1280     }
1281
1282   elts_before = pool_elts (lm->if_address_pool);
1283
1284   error = ip_interface_address_add_del
1285     (lm,
1286      sw_if_index,
1287      addr_fib,
1288      address_length,
1289      is_del,
1290      &if_address_index);
1291   if (error)
1292     goto done;
1293   
1294   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1295     {
1296       if (is_del)
1297         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1298                                   address_length);
1299       
1300       else
1301           ip4_add_interface_routes (sw_if_index,
1302                                     im, ip4_af.fib_index,
1303                                     pool_elt_at_index 
1304                                     (lm->if_address_pool, if_address_index));
1305     }
1306
1307   /* If pool did not grow/shrink: add duplicate address. */
1308   if (elts_before != pool_elts (lm->if_address_pool))
1309     {
1310       ip4_add_del_interface_address_callback_t * cb;
1311       vec_foreach (cb, im->add_del_interface_address_callbacks)
1312         cb->function (im, cb->function_opaque, sw_if_index,
1313                       address, address_length,
1314                       if_address_index,
1315                       is_del);
1316     }
1317
1318  done:
1319   vec_free (addr_fib);
1320   return error;
1321 }
1322
1323 clib_error_t *
1324 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1325                                ip4_address_t * address, u32 address_length,
1326                                u32 is_del)
1327 {
1328   return ip4_add_del_interface_address_internal
1329     (vm, sw_if_index, address, address_length,
1330      /* redistribute */ 1,
1331      /* insert_routes */ 1,
1332      is_del);
1333 }
1334
1335 static clib_error_t *
1336 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1337                                 u32 sw_if_index,
1338                                 u32 flags)
1339 {
1340   ip4_main_t * im = &ip4_main;
1341   ip_interface_address_t * ia;
1342   ip4_address_t * a;
1343   u32 is_admin_up, fib_index;
1344   
1345   /* Fill in lookup tables with default table (0). */
1346   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1347   
1348   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1349   
1350   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1351   
1352   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1353
1354   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1355                                 0 /* honor unnumbered */,
1356   ({
1357     a = ip_interface_address_get_address (&im->lookup_main, ia);
1358     if (is_admin_up)
1359       ip4_add_interface_routes (sw_if_index,
1360                                 im, fib_index,
1361                                 ia);
1362     else
1363       ip4_del_interface_routes (im, fib_index,
1364                                 a, ia->address_length);
1365   }));
1366
1367   return 0;
1368 }
1369  
1370 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1371
1372 /* Built-in ip4 unicast rx feature path definition */
1373 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1374   .node_name = "ip4-inacl", 
1375   .runs_before = {"ip4-source-check-via-rx", 0}, 
1376   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1377 };
1378
1379 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1380   .node_name = "ip4-source-check-via-rx",
1381   .runs_before = {"ip4-source-check-via-any", 0},
1382   .feature_index = 
1383   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1384 };
1385
1386 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1387   .node_name = "ip4-source-check-via-any",
1388   .runs_before = {"ip4-policer-classify", 0},
1389   .feature_index = 
1390   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1391 };
1392
1393 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1394   .node_name = "ip4-policer-classify",
1395   .runs_before = {"ipsec-input-ip4", 0},
1396   .feature_index =
1397   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1398 };
1399
1400 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1401   .node_name = "ipsec-input-ip4",
1402   .runs_before = {"vpath-input-ip4", 0},
1403   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1404 };
1405
1406 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1407   .node_name = "vpath-input-ip4",
1408   .runs_before = {"ip4-lookup", 0},
1409   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1410 };
1411
1412 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1413   .node_name = "ip4-lookup",
1414   .runs_before = {0}, /* not before any other features */
1415   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1416 };
1417
1418 /* Built-in ip4 multicast rx feature path definition */
1419 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1420   .node_name = "vpath-input-ip4",
1421   .runs_before = {"ip4-lookup-multicast", 0},
1422   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1423 };
1424
1425 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1426   .node_name = "ip4-lookup-multicast",
1427   .runs_before = {0}, /* not before any other features */
1428   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1429 };
1430
1431 static char * feature_start_nodes[] = 
1432   { "ip4-input", "ip4-input-no-checksum"};
1433
1434 static clib_error_t *
1435 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1436 {
1437   ip_lookup_main_t * lm = &im->lookup_main;
1438   clib_error_t * error;
1439   vnet_cast_t cast;
1440
1441   for (cast = 0; cast < VNET_N_CAST; cast++)
1442     {
1443       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1444       vnet_config_main_t * vcm = &cm->config_main;
1445
1446       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1447                                          feature_start_nodes,
1448                                          ARRAY_LEN(feature_start_nodes),
1449                                          cast,
1450                                          1 /* is_ip4 */)))
1451         return error;
1452     }
1453   return 0;
1454 }
1455
1456 static clib_error_t *
1457 ip4_sw_interface_add_del (vnet_main_t * vnm,
1458                           u32 sw_if_index,
1459                           u32 is_add)
1460 {
1461   vlib_main_t * vm = vnm->vlib_main;
1462   ip4_main_t * im = &ip4_main;
1463   ip_lookup_main_t * lm = &im->lookup_main;
1464   u32 ci, cast;
1465   u32 feature_index;
1466
1467   for (cast = 0; cast < VNET_N_CAST; cast++)
1468     {
1469       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1470       vnet_config_main_t * vcm = &cm->config_main;
1471
1472       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1473       ci = cm->config_index_by_sw_if_index[sw_if_index];
1474
1475       if (cast == VNET_UNICAST)
1476         feature_index = im->ip4_unicast_rx_feature_lookup;
1477       else
1478         feature_index = im->ip4_multicast_rx_feature_lookup;
1479
1480       if (is_add)
1481         ci = vnet_config_add_feature (vm, vcm,
1482                                       ci,
1483                                       feature_index,
1484                                       /* config data */ 0,
1485                                       /* # bytes of config data */ 0);
1486       else
1487         ci = vnet_config_del_feature (vm, vcm,
1488                                       ci,
1489                                       feature_index,
1490                                       /* config data */ 0,
1491                                       /* # bytes of config data */ 0);
1492
1493       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1494     }
1495
1496   return /* no error */ 0;
1497 }
1498
1499 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1500
1501 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1502
1503 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1504   .function = ip4_lookup,
1505   .name = "ip4-lookup",
1506   .vector_size = sizeof (u32),
1507
1508   .format_trace = format_ip4_lookup_trace,
1509
1510   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1511   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1512 };
1513
1514 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1515
1516 static uword
1517 ip4_indirect (vlib_main_t * vm,
1518                vlib_node_runtime_t * node,
1519                vlib_frame_t * frame)
1520 {
1521   return ip4_lookup_inline (vm, node, frame,
1522                             /* lookup_for_responses_to_locally_received_packets */ 0,
1523                             /* is_indirect */ 1);
1524 }
1525
1526 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1527   .function = ip4_indirect,
1528   .name = "ip4-indirect",
1529   .vector_size = sizeof (u32),
1530   .sibling_of = "ip4-lookup",
1531   .format_trace = format_ip4_lookup_trace,
1532
1533   .n_next_nodes = 0,
1534 };
1535
1536 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1537
1538
1539 /* Global IP4 main. */
1540 ip4_main_t ip4_main;
1541
1542 clib_error_t *
1543 ip4_lookup_init (vlib_main_t * vm)
1544 {
1545   ip4_main_t * im = &ip4_main;
1546   clib_error_t * error;
1547   uword i;
1548
1549   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1550     {
1551       u32 m;
1552
1553       if (i < 32)
1554         m = pow2_mask (i) << (32 - i);
1555       else 
1556         m = ~0;
1557       im->fib_masks[i] = clib_host_to_net_u32 (m);
1558     }
1559
1560   /* Create FIB with index 0 and table id of 0. */
1561   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1562
1563   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1564
1565   {
1566     pg_node_t * pn;
1567     pn = pg_get_node (ip4_lookup_node.index);
1568     pn->unformat_edit = unformat_pg_ip4_header;
1569   }
1570
1571   {
1572     ethernet_arp_header_t h;
1573
1574     memset (&h, 0, sizeof (h));
1575
1576     /* Set target ethernet address to all zeros. */
1577     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1578
1579 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1580 #define _8(f,v) h.f = v;
1581     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1582     _16 (l3_type, ETHERNET_TYPE_IP4);
1583     _8 (n_l2_address_bytes, 6);
1584     _8 (n_l3_address_bytes, 4);
1585     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1586 #undef _16
1587 #undef _8
1588
1589     vlib_packet_template_init (vm,
1590                                &im->ip4_arp_request_packet_template,
1591                                /* data */ &h,
1592                                sizeof (h),
1593                                /* alloc chunk size */ 8,
1594                                "ip4 arp");
1595   }
1596
1597   error = ip4_feature_init (vm, im);
1598
1599   return error;
1600 }
1601
1602 VLIB_INIT_FUNCTION (ip4_lookup_init);
1603
1604 typedef struct {
1605   /* Adjacency taken. */
1606   u32 adj_index;
1607   u32 flow_hash;
1608   u32 fib_index;
1609
1610   /* Packet data, possibly *after* rewrite. */
1611   u8 packet_data[64 - 1*sizeof(u32)];
1612 } ip4_forward_next_trace_t;
1613
1614 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1615 {
1616   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1617   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1618   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1619   uword indent = format_get_indent (s);
1620   s = format (s, "%U%U",
1621                 format_white_space, indent,
1622                 format_ip4_header, t->packet_data);
1623   return s;
1624 }
1625
1626 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1627 {
1628   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1629   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1630   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1631   vnet_main_t * vnm = vnet_get_main();
1632   ip4_main_t * im = &ip4_main;
1633   uword indent = format_get_indent (s);
1634
1635   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1636               t->fib_index, t->adj_index, format_ip_adjacency,
1637               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1638   s = format (s, "\n%U%U",
1639               format_white_space, indent,
1640               format_ip4_header, t->packet_data);
1641   return s;
1642 }
1643
1644 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1645 {
1646   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1647   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1648   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1649   vnet_main_t * vnm = vnet_get_main();
1650   ip4_main_t * im = &ip4_main;
1651   uword indent = format_get_indent (s);
1652
1653   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1654               t->fib_index, t->adj_index, format_ip_adjacency,
1655               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1656   s = format (s, "\n%U%U",
1657               format_white_space, indent,
1658               format_ip_adjacency_packet_data,
1659               vnm, &im->lookup_main, t->adj_index,
1660               t->packet_data, sizeof (t->packet_data));
1661   return s;
1662 }
1663
1664 /* Common trace function for all ip4-forward next nodes. */
1665 void
1666 ip4_forward_next_trace (vlib_main_t * vm,
1667                         vlib_node_runtime_t * node,
1668                         vlib_frame_t * frame,
1669                         vlib_rx_or_tx_t which_adj_index)
1670 {
1671   u32 * from, n_left;
1672   ip4_main_t * im = &ip4_main;
1673
1674   n_left = frame->n_vectors;
1675   from = vlib_frame_vector_args (frame);
1676   
1677   while (n_left >= 4)
1678     {
1679       u32 bi0, bi1;
1680       vlib_buffer_t * b0, * b1;
1681       ip4_forward_next_trace_t * t0, * t1;
1682
1683       /* Prefetch next iteration. */
1684       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1685       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1686
1687       bi0 = from[0];
1688       bi1 = from[1];
1689
1690       b0 = vlib_get_buffer (vm, bi0);
1691       b1 = vlib_get_buffer (vm, bi1);
1692
1693       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1694         {
1695           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1696           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1697           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1698           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1699               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1700               vec_elt (im->fib_index_by_sw_if_index,
1701                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1702
1703           clib_memcpy (t0->packet_data,
1704                   vlib_buffer_get_current (b0),
1705                   sizeof (t0->packet_data));
1706         }
1707       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1708         {
1709           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1710           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1711           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1712           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1713               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1714               vec_elt (im->fib_index_by_sw_if_index,
1715                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1716           clib_memcpy (t1->packet_data,
1717                   vlib_buffer_get_current (b1),
1718                   sizeof (t1->packet_data));
1719         }
1720       from += 2;
1721       n_left -= 2;
1722     }
1723
1724   while (n_left >= 1)
1725     {
1726       u32 bi0;
1727       vlib_buffer_t * b0;
1728       ip4_forward_next_trace_t * t0;
1729
1730       bi0 = from[0];
1731
1732       b0 = vlib_get_buffer (vm, bi0);
1733
1734       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1735         {
1736           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1737           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1738           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1739           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1740               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1741               vec_elt (im->fib_index_by_sw_if_index,
1742                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1743           clib_memcpy (t0->packet_data,
1744                   vlib_buffer_get_current (b0),
1745                   sizeof (t0->packet_data));
1746         }
1747       from += 1;
1748       n_left -= 1;
1749     }
1750 }
1751
1752 static uword
1753 ip4_drop_or_punt (vlib_main_t * vm,
1754                   vlib_node_runtime_t * node,
1755                   vlib_frame_t * frame,
1756                   ip4_error_t error_code)
1757 {
1758   u32 * buffers = vlib_frame_vector_args (frame);
1759   uword n_packets = frame->n_vectors;
1760
1761   vlib_error_drop_buffers (vm, node,
1762                            buffers,
1763                            /* stride */ 1,
1764                            n_packets,
1765                            /* next */ 0,
1766                            ip4_input_node.index,
1767                            error_code);
1768
1769   if (node->flags & VLIB_NODE_FLAG_TRACE)
1770     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1771
1772   return n_packets;
1773 }
1774
1775 static uword
1776 ip4_drop (vlib_main_t * vm,
1777           vlib_node_runtime_t * node,
1778           vlib_frame_t * frame)
1779 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1780
1781 static uword
1782 ip4_punt (vlib_main_t * vm,
1783           vlib_node_runtime_t * node,
1784           vlib_frame_t * frame)
1785 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1786
1787 static uword
1788 ip4_miss (vlib_main_t * vm,
1789           vlib_node_runtime_t * node,
1790           vlib_frame_t * frame)
1791 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1792
1793 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1794   .function = ip4_drop,
1795   .name = "ip4-drop",
1796   .vector_size = sizeof (u32),
1797
1798   .format_trace = format_ip4_forward_next_trace,
1799
1800   .n_next_nodes = 1,
1801   .next_nodes = {
1802     [0] = "error-drop",
1803   },
1804 };
1805
1806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1807
1808 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1809   .function = ip4_punt,
1810   .name = "ip4-punt",
1811   .vector_size = sizeof (u32),
1812
1813   .format_trace = format_ip4_forward_next_trace,
1814
1815   .n_next_nodes = 1,
1816   .next_nodes = {
1817     [0] = "error-punt",
1818   },
1819 };
1820
1821 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1822
1823 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1824   .function = ip4_miss,
1825   .name = "ip4-miss",
1826   .vector_size = sizeof (u32),
1827
1828   .format_trace = format_ip4_forward_next_trace,
1829
1830   .n_next_nodes = 1,
1831   .next_nodes = {
1832     [0] = "error-drop",
1833   },
1834 };
1835
1836 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1837
1838 /* Compute TCP/UDP/ICMP4 checksum in software. */
1839 u16
1840 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1841                               ip4_header_t * ip0)
1842 {
1843   ip_csum_t sum0;
1844   u32 ip_header_length, payload_length_host_byte_order;
1845   u32 n_this_buffer, n_bytes_left;
1846   u16 sum16;
1847   void * data_this_buffer;
1848   
1849   /* Initialize checksum with ip header. */
1850   ip_header_length = ip4_header_bytes (ip0);
1851   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1852   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1853
1854   if (BITS (uword) == 32)
1855     {
1856       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1857       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1858     }
1859   else
1860     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1861
1862   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1863   data_this_buffer = (void *) ip0 + ip_header_length;
1864   if (n_this_buffer + ip_header_length > p0->current_length)
1865     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1866   while (1)
1867     {
1868       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1869       n_bytes_left -= n_this_buffer;
1870       if (n_bytes_left == 0)
1871         break;
1872
1873       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1874       p0 = vlib_get_buffer (vm, p0->next_buffer);
1875       data_this_buffer = vlib_buffer_get_current (p0);
1876       n_this_buffer = p0->current_length;
1877     }
1878
1879   sum16 = ~ ip_csum_fold (sum0);
1880
1881   return sum16;
1882 }
1883
1884 static u32
1885 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1886 {
1887   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1888   udp_header_t * udp0;
1889   u16 sum16;
1890
1891   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1892           || ip0->protocol == IP_PROTOCOL_UDP);
1893
1894   udp0 = (void *) (ip0 + 1);
1895   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1896     {
1897       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1898                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1899       return p0->flags;
1900     }
1901
1902   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1903
1904   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1905                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1906
1907   return p0->flags;
1908 }
1909
1910 static uword
1911 ip4_local (vlib_main_t * vm,
1912            vlib_node_runtime_t * node,
1913            vlib_frame_t * frame)
1914 {
1915   ip4_main_t * im = &ip4_main;
1916   ip_lookup_main_t * lm = &im->lookup_main;
1917   ip_local_next_t next_index;
1918   u32 * from, * to_next, n_left_from, n_left_to_next;
1919   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1920
1921   from = vlib_frame_vector_args (frame);
1922   n_left_from = frame->n_vectors;
1923   next_index = node->cached_next_index;
1924   
1925   if (node->flags & VLIB_NODE_FLAG_TRACE)
1926     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1927
1928   while (n_left_from > 0)
1929     {
1930       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1931
1932       while (n_left_from >= 4 && n_left_to_next >= 2)
1933         {
1934           vlib_buffer_t * p0, * p1;
1935           ip4_header_t * ip0, * ip1;
1936           udp_header_t * udp0, * udp1;
1937           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1938           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1939           ip_adjacency_t * adj0, * adj1;
1940           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1941           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1942           i32 len_diff0, len_diff1;
1943           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1944           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1945           u8 enqueue_code;
1946       
1947           pi0 = to_next[0] = from[0];
1948           pi1 = to_next[1] = from[1];
1949           from += 2;
1950           n_left_from -= 2;
1951           to_next += 2;
1952           n_left_to_next -= 2;
1953       
1954           p0 = vlib_get_buffer (vm, pi0);
1955           p1 = vlib_get_buffer (vm, pi1);
1956
1957           ip0 = vlib_buffer_get_current (p0);
1958           ip1 = vlib_buffer_get_current (p1);
1959
1960           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1961                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1962           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1963                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1964
1965           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1966           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1967
1968           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1969
1970           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1971           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1972
1973           /* Treat IP frag packets as "experimental" protocol for now
1974              until support of IP frag reassembly is implemented */
1975           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1976           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1977           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1978           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1979           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1980           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1981
1982           flags0 = p0->flags;
1983           flags1 = p1->flags;
1984
1985           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1986           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1987
1988           udp0 = ip4_next_header (ip0);
1989           udp1 = ip4_next_header (ip1);
1990
1991           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1992           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1993           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1994
1995           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1996           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1997
1998           /* Verify UDP length. */
1999           ip_len0 = clib_net_to_host_u16 (ip0->length);
2000           ip_len1 = clib_net_to_host_u16 (ip1->length);
2001           udp_len0 = clib_net_to_host_u16 (udp0->length);
2002           udp_len1 = clib_net_to_host_u16 (udp1->length);
2003
2004           len_diff0 = ip_len0 - udp_len0;
2005           len_diff1 = ip_len1 - udp_len1;
2006
2007           len_diff0 = is_udp0 ? len_diff0 : 0;
2008           len_diff1 = is_udp1 ? len_diff1 : 0;
2009
2010           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2011                                 & good_tcp_udp0 & good_tcp_udp1)))
2012             {
2013               if (is_tcp_udp0)
2014                 {
2015                   if (is_tcp_udp0
2016                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2017                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2018                   good_tcp_udp0 =
2019                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2020                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2021                 }
2022               if (is_tcp_udp1)
2023                 {
2024                   if (is_tcp_udp1
2025                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2026                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2027                   good_tcp_udp1 =
2028                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2029                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2030                 }
2031             }
2032
2033           good_tcp_udp0 &= len_diff0 >= 0;
2034           good_tcp_udp1 &= len_diff1 >= 0;
2035
2036           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2037           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2038
2039           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2040
2041           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2042           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2043
2044           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2045           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2046                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2047                     : error0);
2048           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2049                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2050                     : error1);
2051
2052           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2053           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2054
2055           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2056           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2057
2058           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2059           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2060
2061           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2062                                                            &ip0->src_address,
2063                                                            /* no_default_route */ 1));
2064           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2065                                                            &ip1->src_address,
2066                                                            /* no_default_route */ 1));
2067
2068           adj0 = ip_get_adjacency (lm, adj_index0);
2069           adj1 = ip_get_adjacency (lm, adj_index1);
2070
2071           /* 
2072            * Must have a route to source otherwise we drop the packet.
2073            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2074            */
2075           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2076                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2077                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2078                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2079                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2080                     ? IP4_ERROR_SRC_LOOKUP_MISS
2081                     : error0);
2082           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2083                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2084                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2085                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2086                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2087                     ? IP4_ERROR_SRC_LOOKUP_MISS
2088                     : error1);
2089
2090           next0 = lm->local_next_by_ip_protocol[proto0];
2091           next1 = lm->local_next_by_ip_protocol[proto1];
2092
2093           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2094           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2095
2096           p0->error = error0 ? error_node->errors[error0] : 0;
2097           p1->error = error1 ? error_node->errors[error1] : 0;
2098
2099           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2100
2101           if (PREDICT_FALSE (enqueue_code != 0))
2102             {
2103               switch (enqueue_code)
2104                 {
2105                 case 1:
2106                   /* A B A */
2107                   to_next[-2] = pi1;
2108                   to_next -= 1;
2109                   n_left_to_next += 1;
2110                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2111                   break;
2112
2113                 case 2:
2114                   /* A A B */
2115                   to_next -= 1;
2116                   n_left_to_next += 1;
2117                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2118                   break;
2119
2120                 case 3:
2121                   /* A B B or A B C */
2122                   to_next -= 2;
2123                   n_left_to_next += 2;
2124                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2125                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2126                   if (next0 == next1)
2127                     {
2128                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2129                       next_index = next1;
2130                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2131                     }
2132                   break;
2133                 }
2134             }
2135         }
2136
2137       while (n_left_from > 0 && n_left_to_next > 0)
2138         {
2139           vlib_buffer_t * p0;
2140           ip4_header_t * ip0;
2141           udp_header_t * udp0;
2142           ip4_fib_mtrie_t * mtrie0;
2143           ip4_fib_mtrie_leaf_t leaf0;
2144           ip_adjacency_t * adj0;
2145           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2146           i32 len_diff0;
2147           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2148       
2149           pi0 = to_next[0] = from[0];
2150           from += 1;
2151           n_left_from -= 1;
2152           to_next += 1;
2153           n_left_to_next -= 1;
2154       
2155           p0 = vlib_get_buffer (vm, pi0);
2156
2157           ip0 = vlib_buffer_get_current (p0);
2158
2159           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2160                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2161
2162           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2163
2164           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2165
2166           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2167
2168           /* Treat IP frag packets as "experimental" protocol for now
2169              until support of IP frag reassembly is implemented */
2170           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2171           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2172           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2173
2174           flags0 = p0->flags;
2175
2176           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2177
2178           udp0 = ip4_next_header (ip0);
2179
2180           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2181           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2182
2183           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2184
2185           /* Verify UDP length. */
2186           ip_len0 = clib_net_to_host_u16 (ip0->length);
2187           udp_len0 = clib_net_to_host_u16 (udp0->length);
2188
2189           len_diff0 = ip_len0 - udp_len0;
2190
2191           len_diff0 = is_udp0 ? len_diff0 : 0;
2192
2193           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2194             {
2195               if (is_tcp_udp0)
2196                 {
2197                   if (is_tcp_udp0
2198                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2199                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2200                   good_tcp_udp0 =
2201                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2202                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2203                 }
2204             }
2205
2206           good_tcp_udp0 &= len_diff0 >= 0;
2207
2208           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2209
2210           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2211
2212           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2213
2214           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2215           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2216                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2217                     : error0);
2218
2219           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2220
2221           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2222           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2223
2224           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2225                                                            &ip0->src_address,
2226                                                            /* no_default_route */ 1));
2227
2228           adj0 = ip_get_adjacency (lm, adj_index0);
2229
2230           /* Must have a route to source otherwise we drop the packet. */
2231           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2232                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2233                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2234                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2235                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2236                     ? IP4_ERROR_SRC_LOOKUP_MISS
2237                     : error0);
2238
2239           next0 = lm->local_next_by_ip_protocol[proto0];
2240
2241           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2242
2243           p0->error = error0? error_node->errors[error0] : 0;
2244
2245           if (PREDICT_FALSE (next0 != next_index))
2246             {
2247               n_left_to_next += 1;
2248               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2249
2250               next_index = next0;
2251               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2252               to_next[0] = pi0;
2253               to_next += 1;
2254               n_left_to_next -= 1;
2255             }
2256         }
2257   
2258       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2259     }
2260
2261   return frame->n_vectors;
2262 }
2263
2264 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2265   .function = ip4_local,
2266   .name = "ip4-local",
2267   .vector_size = sizeof (u32),
2268
2269   .format_trace = format_ip4_forward_next_trace,
2270
2271   .n_next_nodes = IP_LOCAL_N_NEXT,
2272   .next_nodes = {
2273     [IP_LOCAL_NEXT_DROP] = "error-drop",
2274     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2275     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2276     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2277   },
2278 };
2279
2280 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2281
2282 void ip4_register_protocol (u32 protocol, u32 node_index)
2283 {
2284   vlib_main_t * vm = vlib_get_main();
2285   ip4_main_t * im = &ip4_main;
2286   ip_lookup_main_t * lm = &im->lookup_main;
2287
2288   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2289   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2290 }
2291
2292 static clib_error_t *
2293 show_ip_local_command_fn (vlib_main_t * vm,
2294                           unformat_input_t * input,
2295                          vlib_cli_command_t * cmd)
2296 {
2297   ip4_main_t * im = &ip4_main;
2298   ip_lookup_main_t * lm = &im->lookup_main;
2299   int i;
2300
2301   vlib_cli_output (vm, "Protocols handled by ip4_local");
2302   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2303     {
2304       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2305         vlib_cli_output (vm, "%d", i);
2306     }
2307   return 0;
2308 }
2309
2310
2311
2312 VLIB_CLI_COMMAND (show_ip_local, static) = {
2313   .path = "show ip local",
2314   .function = show_ip_local_command_fn,
2315   .short_help = "Show ip local protocol table",
2316 };
2317
2318 static uword
2319 ip4_arp (vlib_main_t * vm,
2320          vlib_node_runtime_t * node,
2321          vlib_frame_t * frame)
2322 {
2323   vnet_main_t * vnm = vnet_get_main();
2324   ip4_main_t * im = &ip4_main;
2325   ip_lookup_main_t * lm = &im->lookup_main;
2326   u32 * from, * to_next_drop;
2327   uword n_left_from, n_left_to_next_drop, next_index;
2328   static f64 time_last_seed_change = -1e100;
2329   static u32 hash_seeds[3];
2330   static uword hash_bitmap[256 / BITS (uword)]; 
2331   f64 time_now;
2332
2333   if (node->flags & VLIB_NODE_FLAG_TRACE)
2334     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2335
2336   time_now = vlib_time_now (vm);
2337   if (time_now - time_last_seed_change > 1e-3)
2338     {
2339       uword i;
2340       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2341                                              sizeof (hash_seeds));
2342       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2343         hash_seeds[i] = r[i];
2344
2345       /* Mark all hash keys as been no-seen before. */
2346       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2347         hash_bitmap[i] = 0;
2348
2349       time_last_seed_change = time_now;
2350     }
2351
2352   from = vlib_frame_vector_args (frame);
2353   n_left_from = frame->n_vectors;
2354   next_index = node->cached_next_index;
2355   if (next_index == IP4_ARP_NEXT_DROP)
2356     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2357
2358   while (n_left_from > 0)
2359     {
2360       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2361                            to_next_drop, n_left_to_next_drop);
2362
2363       while (n_left_from > 0 && n_left_to_next_drop > 0)
2364         {
2365           vlib_buffer_t * p0;
2366           ip4_header_t * ip0;
2367           ethernet_header_t * eh0;
2368           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2369           uword bm0;
2370           ip_adjacency_t * adj0;
2371
2372           pi0 = from[0];
2373
2374           p0 = vlib_get_buffer (vm, pi0);
2375
2376           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2377           adj0 = ip_get_adjacency (lm, adj_index0);
2378           ip0 = vlib_buffer_get_current (p0);
2379
2380           /* If packet destination is not local, send ARP to next hop */
2381           if (adj0->arp.next_hop.ip4.as_u32)
2382             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2383
2384           /* 
2385            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2386            * rewrite to this packet, we need to skip it here.
2387            * Note, to distinguish from src IP addr *.8.6.*, we
2388            * check for a bcast eth dest instead of IPv4 version.
2389            */
2390           eh0 = (ethernet_header_t*)ip0;
2391           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2392             {
2393               u32 vlan_num = 0;
2394               u16 * etype = &eh0->type;
2395               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2396                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2397                 {
2398                   vlan_num += 1;
2399                   etype += 2; //vlan tag also 16 bits, same as etype
2400                 }
2401               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2402                 {
2403                   vlib_buffer_advance (
2404                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2405                   ip0 = vlib_buffer_get_current (p0);
2406                 }
2407             }
2408
2409           a0 = hash_seeds[0];
2410           b0 = hash_seeds[1];
2411           c0 = hash_seeds[2];
2412
2413           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2414           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2415
2416           a0 ^= ip0->dst_address.data_u32;
2417           b0 ^= sw_if_index0;
2418
2419           hash_v3_finalize32 (a0, b0, c0);
2420
2421           c0 &= BITS (hash_bitmap) - 1;
2422           c0 = c0 / BITS (uword);
2423           m0 = (uword) 1 << (c0 % BITS (uword));
2424
2425           bm0 = hash_bitmap[c0];
2426           drop0 = (bm0 & m0) != 0;
2427
2428           /* Mark it as seen. */
2429           hash_bitmap[c0] = bm0 | m0;
2430
2431           from += 1;
2432           n_left_from -= 1;
2433           to_next_drop[0] = pi0;
2434           to_next_drop += 1;
2435           n_left_to_next_drop -= 1;
2436
2437           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2438
2439           if (drop0)
2440             continue;
2441
2442           /* 
2443            * Can happen if the control-plane is programming tables
2444            * with traffic flowing; at least that's today's lame excuse.
2445            */
2446           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2447             {
2448               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2449             }
2450           else
2451           /* Send ARP request. */
2452           {
2453             u32 bi0 = 0;
2454             vlib_buffer_t * b0;
2455             ethernet_arp_header_t * h0;
2456             vnet_hw_interface_t * hw_if0;
2457
2458             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2459
2460             /* Add rewrite/encap string for ARP packet. */
2461             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2462
2463             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2464
2465             /* Src ethernet address in ARP header. */
2466             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2467                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2468
2469             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2470                 //No source address available
2471                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2472                 vlib_buffer_free(vm, &bi0, 1);
2473                 continue;
2474             }
2475
2476             /* Copy in destination address we are requesting. */
2477             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2478
2479             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2480             b0 = vlib_get_buffer (vm, bi0);
2481             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2482
2483             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2484
2485             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2486           }
2487         }
2488
2489       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2490     }
2491
2492   return frame->n_vectors;
2493 }
2494
2495 static char * ip4_arp_error_strings[] = {
2496   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2497   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2498   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2499   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2500   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2501   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2502 };
2503
2504 VLIB_REGISTER_NODE (ip4_arp_node) = {
2505   .function = ip4_arp,
2506   .name = "ip4-arp",
2507   .vector_size = sizeof (u32),
2508
2509   .format_trace = format_ip4_forward_next_trace,
2510
2511   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2512   .error_strings = ip4_arp_error_strings,
2513
2514   .n_next_nodes = IP4_ARP_N_NEXT,
2515   .next_nodes = {
2516     [IP4_ARP_NEXT_DROP] = "error-drop",
2517   },
2518 };
2519
2520 #define foreach_notrace_ip4_arp_error           \
2521 _(DROP)                                         \
2522 _(REQUEST_SENT)                                 \
2523 _(REPLICATE_DROP)                               \
2524 _(REPLICATE_FAIL)
2525
2526 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2527 {
2528   vlib_node_runtime_t *rt = 
2529     vlib_node_get_runtime (vm, ip4_arp_node.index);
2530
2531   /* don't trace ARP request packets */
2532 #define _(a)                                    \
2533     vnet_pcap_drop_trace_filter_add_del         \
2534         (rt->errors[IP4_ARP_ERROR_##a],         \
2535          1 /* is_add */);
2536     foreach_notrace_ip4_arp_error;
2537 #undef _
2538   return 0;
2539 }
2540
2541 VLIB_INIT_FUNCTION(arp_notrace_init);
2542
2543
2544 /* Send an ARP request to see if given destination is reachable on given interface. */
2545 clib_error_t *
2546 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2547 {
2548   vnet_main_t * vnm = vnet_get_main();
2549   ip4_main_t * im = &ip4_main;
2550   ethernet_arp_header_t * h;
2551   ip4_address_t * src;
2552   ip_interface_address_t * ia;
2553   ip_adjacency_t * adj;
2554   vnet_hw_interface_t * hi;
2555   vnet_sw_interface_t * si;
2556   vlib_buffer_t * b;
2557   u32 bi = 0;
2558
2559   si = vnet_get_sw_interface (vnm, sw_if_index);
2560
2561   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2562     {
2563       return clib_error_return (0, "%U: interface %U down",
2564                                 format_ip4_address, dst, 
2565                                 format_vnet_sw_if_index_name, vnm, 
2566                                 sw_if_index);
2567     }
2568
2569   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2570   if (! src)
2571     {
2572       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2573       return clib_error_return 
2574         (0, "no matching interface address for destination %U (interface %U)",
2575          format_ip4_address, dst,
2576          format_vnet_sw_if_index_name, vnm, sw_if_index);
2577     }
2578
2579   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2580
2581   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2582
2583   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2584
2585   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2586
2587   h->ip4_over_ethernet[0].ip4 = src[0];
2588   h->ip4_over_ethernet[1].ip4 = dst[0];
2589
2590   b = vlib_get_buffer (vm, bi);
2591   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2592
2593   /* Add encapsulation string for software interface (e.g. ethernet header). */
2594   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2595   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2596
2597   {
2598     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2599     u32 * to_next = vlib_frame_vector_args (f);
2600     to_next[0] = bi;
2601     f->n_vectors = 1;
2602     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2603   }
2604
2605   return /* no error */ 0;
2606 }
2607
2608 typedef enum {
2609   IP4_REWRITE_NEXT_DROP,
2610   IP4_REWRITE_NEXT_ARP,
2611   IP4_REWRITE_NEXT_ICMP_ERROR,
2612 } ip4_rewrite_next_t;
2613
2614 always_inline uword
2615 ip4_rewrite_inline (vlib_main_t * vm,
2616                     vlib_node_runtime_t * node,
2617                     vlib_frame_t * frame,
2618                     int rewrite_for_locally_received_packets)
2619 {
2620   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2621   u32 * from = vlib_frame_vector_args (frame);
2622   u32 n_left_from, n_left_to_next, * to_next, next_index;
2623   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2624   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2625
2626   n_left_from = frame->n_vectors;
2627   next_index = node->cached_next_index;
2628   u32 cpu_index = os_get_cpu_number();
2629   
2630   while (n_left_from > 0)
2631     {
2632       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2633
2634       while (n_left_from >= 4 && n_left_to_next >= 2)
2635         {
2636           ip_adjacency_t * adj0, * adj1;
2637           vlib_buffer_t * p0, * p1;
2638           ip4_header_t * ip0, * ip1;
2639           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2640           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2641           u32 next0_override, next1_override;
2642       
2643           if (rewrite_for_locally_received_packets)
2644               next0_override = next1_override = 0;
2645
2646           /* Prefetch next iteration. */
2647           {
2648             vlib_buffer_t * p2, * p3;
2649
2650             p2 = vlib_get_buffer (vm, from[2]);
2651             p3 = vlib_get_buffer (vm, from[3]);
2652
2653             vlib_prefetch_buffer_header (p2, STORE);
2654             vlib_prefetch_buffer_header (p3, STORE);
2655
2656             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2657             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2658           }
2659
2660           pi0 = to_next[0] = from[0];
2661           pi1 = to_next[1] = from[1];
2662
2663           from += 2;
2664           n_left_from -= 2;
2665           to_next += 2;
2666           n_left_to_next -= 2;
2667       
2668           p0 = vlib_get_buffer (vm, pi0);
2669           p1 = vlib_get_buffer (vm, pi1);
2670
2671           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2672           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2673
2674           /* We should never rewrite a pkt using the MISS adjacency */
2675           ASSERT(adj_index0 && adj_index1);
2676
2677           ip0 = vlib_buffer_get_current (p0);
2678           ip1 = vlib_buffer_get_current (p1);
2679
2680           error0 = error1 = IP4_ERROR_NONE;
2681           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2682
2683           /* Decrement TTL & update checksum.
2684              Works either endian, so no need for byte swap. */
2685           if (! rewrite_for_locally_received_packets)
2686             {
2687               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2688
2689               /* Input node should have reject packets with ttl 0. */
2690               ASSERT (ip0->ttl > 0);
2691               ASSERT (ip1->ttl > 0);
2692
2693               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2694               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2695
2696               checksum0 += checksum0 >= 0xffff;
2697               checksum1 += checksum1 >= 0xffff;
2698
2699               ip0->checksum = checksum0;
2700               ip1->checksum = checksum1;
2701
2702               ttl0 -= 1;
2703               ttl1 -= 1;
2704
2705               ip0->ttl = ttl0;
2706               ip1->ttl = ttl1;
2707
2708               /*
2709                * If the ttl drops below 1 when forwarding, generate
2710                * an ICMP response.
2711                */
2712               if (PREDICT_FALSE(ttl0 <= 0))
2713                 {
2714                   error0 = IP4_ERROR_TIME_EXPIRED;
2715                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2716                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2717                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2718                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2719                 }
2720               if (PREDICT_FALSE(ttl1 <= 0))
2721                 {
2722                   error1 = IP4_ERROR_TIME_EXPIRED;
2723                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2724                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2725                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2726                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2727                 }
2728
2729               /* Verify checksum. */
2730               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2731               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2732             }
2733
2734           /* Rewrite packet header and updates lengths. */
2735           adj0 = ip_get_adjacency (lm, adj_index0);
2736           adj1 = ip_get_adjacency (lm, adj_index1);
2737       
2738           if (rewrite_for_locally_received_packets)
2739             {
2740               /*
2741                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2742                * we end up here with a local adjacency in hand
2743                * The local adj rewrite data is 0xfefe on purpose.
2744                * Bad engineer, no donut for you.
2745                */
2746               if (PREDICT_FALSE(adj0->lookup_next_index 
2747                                 == IP_LOOKUP_NEXT_LOCAL))
2748                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2749               if (PREDICT_FALSE(adj0->lookup_next_index
2750                                 == IP_LOOKUP_NEXT_ARP))
2751                 next0_override = IP4_REWRITE_NEXT_ARP;
2752               if (PREDICT_FALSE(adj1->lookup_next_index 
2753                                 == IP_LOOKUP_NEXT_LOCAL))
2754                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2755               if (PREDICT_FALSE(adj1->lookup_next_index
2756                                 == IP_LOOKUP_NEXT_ARP))
2757                 next1_override = IP4_REWRITE_NEXT_ARP;
2758             }
2759
2760           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2761           rw_len0 = adj0[0].rewrite_header.data_bytes;
2762           rw_len1 = adj1[0].rewrite_header.data_bytes;
2763
2764           /* Check MTU of outgoing interface. */
2765           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2766                     ? IP4_ERROR_MTU_EXCEEDED
2767                     : error0);
2768           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2769                     ? IP4_ERROR_MTU_EXCEEDED
2770                     : error1);
2771
2772           next0 = (error0 == IP4_ERROR_NONE)
2773             ? adj0[0].rewrite_header.next_index : next0;
2774
2775           if (rewrite_for_locally_received_packets)
2776               next0 = next0 && next0_override ? next0_override : next0;
2777
2778           next1 = (error1 == IP4_ERROR_NONE)
2779             ? adj1[0].rewrite_header.next_index : next1;
2780
2781           if (rewrite_for_locally_received_packets)
2782               next1 = next1 && next1_override ? next1_override : next1;
2783
2784           /* 
2785            * We've already accounted for an ethernet_header_t elsewhere
2786            */
2787           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2788               vlib_increment_combined_counter 
2789                   (&lm->adjacency_counters,
2790                    cpu_index, adj_index0, 
2791                    /* packet increment */ 0,
2792                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2793
2794           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2795               vlib_increment_combined_counter 
2796                   (&lm->adjacency_counters,
2797                    cpu_index, adj_index1, 
2798                    /* packet increment */ 0,
2799                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2800
2801           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2802            * to see the IP headerr */
2803           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2804             {
2805               p0->current_data -= rw_len0;
2806               p0->current_length += rw_len0;
2807               p0->error = error_node->errors[error0];
2808               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2809                   adj0[0].rewrite_header.sw_if_index;
2810             }
2811           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2812             {
2813               p1->current_data -= rw_len1;
2814               p1->current_length += rw_len1;
2815               p1->error = error_node->errors[error1];
2816               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2817                   adj1[0].rewrite_header.sw_if_index;
2818             }
2819
2820           /* Guess we are only writing on simple Ethernet header. */
2821           vnet_rewrite_two_headers (adj0[0], adj1[0],
2822                                     ip0, ip1,
2823                                     sizeof (ethernet_header_t));
2824       
2825           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2826                                            to_next, n_left_to_next,
2827                                            pi0, pi1, next0, next1);
2828         }
2829
2830       while (n_left_from > 0 && n_left_to_next > 0)
2831         {
2832           ip_adjacency_t * adj0;
2833           vlib_buffer_t * p0;
2834           ip4_header_t * ip0;
2835           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2836           u32 next0_override;
2837       
2838           if (rewrite_for_locally_received_packets)
2839               next0_override = 0;
2840
2841           pi0 = to_next[0] = from[0];
2842
2843           p0 = vlib_get_buffer (vm, pi0);
2844
2845           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2846
2847           /* We should never rewrite a pkt using the MISS adjacency */
2848           ASSERT(adj_index0);
2849
2850           adj0 = ip_get_adjacency (lm, adj_index0);
2851       
2852           ip0 = vlib_buffer_get_current (p0);
2853
2854           error0 = IP4_ERROR_NONE;
2855           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2856
2857           /* Decrement TTL & update checksum. */
2858           if (! rewrite_for_locally_received_packets)
2859             {
2860               i32 ttl0 = ip0->ttl;
2861
2862               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2863
2864               checksum0 += checksum0 >= 0xffff;
2865
2866               ip0->checksum = checksum0;
2867
2868               ASSERT (ip0->ttl > 0);
2869
2870               ttl0 -= 1;
2871
2872               ip0->ttl = ttl0;
2873
2874               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2875
2876               if (PREDICT_FALSE(ttl0 <= 0))
2877                 {
2878                   /*
2879                    * If the ttl drops below 1 when forwarding, generate
2880                    * an ICMP response.
2881                    */
2882                   error0 = IP4_ERROR_TIME_EXPIRED;
2883                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2884                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2885                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2886                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2887                 }
2888             }
2889
2890           if (rewrite_for_locally_received_packets)
2891             {
2892               /*
2893                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2894                * we end up here with a local adjacency in hand
2895                * The local adj rewrite data is 0xfefe on purpose.
2896                * Bad engineer, no donut for you.
2897                */
2898               if (PREDICT_FALSE(adj0->lookup_next_index 
2899                                 == IP_LOOKUP_NEXT_LOCAL))
2900                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2901               /* 
2902                * We have to override the next_index in ARP adjacencies,
2903                * because they're set up for ip4-arp, not this node...
2904                */
2905               if (PREDICT_FALSE(adj0->lookup_next_index
2906                                 == IP_LOOKUP_NEXT_ARP))
2907                 next0_override = IP4_REWRITE_NEXT_ARP;
2908             }
2909
2910           /* Guess we are only writing on simple Ethernet header. */
2911           vnet_rewrite_one_header (adj0[0], ip0, 
2912                                    sizeof (ethernet_header_t));
2913           
2914           /* Update packet buffer attributes/set output interface. */
2915           rw_len0 = adj0[0].rewrite_header.data_bytes;
2916           
2917           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2918               vlib_increment_combined_counter 
2919                   (&lm->adjacency_counters,
2920                    cpu_index, adj_index0, 
2921                    /* packet increment */ 0,
2922                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2923           
2924           /* Check MTU of outgoing interface. */
2925           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2926                     > adj0[0].rewrite_header.max_l3_packet_bytes
2927                     ? IP4_ERROR_MTU_EXCEEDED
2928                     : error0);
2929
2930           p0->error = error_node->errors[error0];
2931
2932           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2933            * to see the IP headerr */
2934           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2935             {
2936               p0->current_data -= rw_len0;
2937               p0->current_length += rw_len0;
2938
2939               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2940                   adj0[0].rewrite_header.sw_if_index;
2941               next0 = adj0[0].rewrite_header.next_index;
2942             }
2943
2944           if (rewrite_for_locally_received_packets)
2945               next0 = next0 && next0_override ? next0_override : next0;
2946
2947           from += 1;
2948           n_left_from -= 1;
2949           to_next += 1;
2950           n_left_to_next -= 1;
2951       
2952           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2953                                            to_next, n_left_to_next,
2954                                            pi0, next0);
2955         }
2956   
2957       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2958     }
2959
2960   /* Need to do trace after rewrites to pick up new packet data. */
2961   if (node->flags & VLIB_NODE_FLAG_TRACE)
2962     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2963
2964   return frame->n_vectors;
2965 }
2966
2967
2968 /** @brief IPv4 transit rewrite node.
2969     @node ip4-rewrite-transit
2970
2971     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2972     header checksum, fetch the ip adjacency, check the outbound mtu,
2973     apply the adjacency rewrite, and send pkts to the adjacency
2974     rewrite header's rewrite_next_index.
2975
2976     @param vm vlib_main_t corresponding to the current thread
2977     @param node vlib_node_runtime_t
2978     @param frame vlib_frame_t whose contents should be dispatched
2979
2980     @par Graph mechanics: buffer metadata, next index usage
2981
2982     @em Uses:
2983     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2984         - the rewrite adjacency index
2985     - <code>adj->lookup_next_index</code>
2986         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2987           the packet will be dropped. 
2988     - <code>adj->rewrite_header</code>
2989         - Rewrite string length, rewrite string, next_index
2990
2991     @em Sets:
2992     - <code>b->current_data, b->current_length</code>
2993         - Updated net of applying the rewrite string
2994
2995     <em>Next Indices:</em>
2996     - <code> adj->rewrite_header.next_index </code>
2997       or @c error-drop 
2998 */
2999 static uword
3000 ip4_rewrite_transit (vlib_main_t * vm,
3001                      vlib_node_runtime_t * node,
3002                      vlib_frame_t * frame)
3003 {
3004   return ip4_rewrite_inline (vm, node, frame,
3005                              /* rewrite_for_locally_received_packets */ 0);
3006 }
3007
3008 /** @brief IPv4 local rewrite node.
3009     @node ip4-rewrite-local
3010
3011     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3012     the outbound interface mtu, apply the adjacency rewrite, and send
3013     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3014     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3015     dst = interface addr."
3016
3017     @param vm vlib_main_t corresponding to the current thread
3018     @param node vlib_node_runtime_t
3019     @param frame vlib_frame_t whose contents should be dispatched
3020
3021     @par Graph mechanics: buffer metadata, next index usage
3022
3023     @em Uses:
3024     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3025         - the rewrite adjacency index
3026     - <code>adj->lookup_next_index</code>
3027         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3028           the packet will be dropped. 
3029     - <code>adj->rewrite_header</code>
3030         - Rewrite string length, rewrite string, next_index
3031
3032     @em Sets:
3033     - <code>b->current_data, b->current_length</code>
3034         - Updated net of applying the rewrite string
3035
3036     <em>Next Indices:</em>
3037     - <code> adj->rewrite_header.next_index </code>
3038       or @c error-drop 
3039 */
3040
3041 static uword
3042 ip4_rewrite_local (vlib_main_t * vm,
3043                    vlib_node_runtime_t * node,
3044                    vlib_frame_t * frame)
3045 {
3046   return ip4_rewrite_inline (vm, node, frame,
3047                              /* rewrite_for_locally_received_packets */ 1);
3048 }
3049
3050 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3051   .function = ip4_rewrite_transit,
3052   .name = "ip4-rewrite-transit",
3053   .vector_size = sizeof (u32),
3054
3055   .format_trace = format_ip4_rewrite_trace,
3056
3057   .n_next_nodes = 3,
3058   .next_nodes = {
3059     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3060     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3061     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3062   },
3063 };
3064
3065 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3066
3067 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3068   .function = ip4_rewrite_local,
3069   .name = "ip4-rewrite-local",
3070   .vector_size = sizeof (u32),
3071
3072   .sibling_of = "ip4-rewrite-transit",
3073
3074   .format_trace = format_ip4_rewrite_trace,
3075
3076   .n_next_nodes = 0,
3077 };
3078
3079 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3080
3081 static clib_error_t *
3082 add_del_interface_table (vlib_main_t * vm,
3083                          unformat_input_t * input,
3084                          vlib_cli_command_t * cmd)
3085 {
3086   vnet_main_t * vnm = vnet_get_main();
3087   clib_error_t * error = 0;
3088   u32 sw_if_index, table_id;
3089
3090   sw_if_index = ~0;
3091
3092   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3093     {
3094       error = clib_error_return (0, "unknown interface `%U'",
3095                                  format_unformat_error, input);
3096       goto done;
3097     }
3098
3099   if (unformat (input, "%d", &table_id))
3100     ;
3101   else
3102     {
3103       error = clib_error_return (0, "expected table id `%U'",
3104                                  format_unformat_error, input);
3105       goto done;
3106     }
3107
3108   {
3109     ip4_main_t * im = &ip4_main;
3110     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3111
3112     if (fib) 
3113       {
3114         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3115         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3116     }
3117   }
3118
3119  done:
3120   return error;
3121 }
3122
3123 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3124   .path = "set interface ip table",
3125   .function = add_del_interface_table,
3126   .short_help = "Add/delete FIB table id for interface",
3127 };
3128
3129
3130 static uword
3131 ip4_lookup_multicast (vlib_main_t * vm,
3132                       vlib_node_runtime_t * node,
3133                       vlib_frame_t * frame)
3134 {
3135   ip4_main_t * im = &ip4_main;
3136   ip_lookup_main_t * lm = &im->lookup_main;
3137   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3138   u32 n_left_from, n_left_to_next, * from, * to_next;
3139   ip_lookup_next_t next;
3140   u32 cpu_index = os_get_cpu_number();
3141
3142   from = vlib_frame_vector_args (frame);
3143   n_left_from = frame->n_vectors;
3144   next = node->cached_next_index;
3145
3146   while (n_left_from > 0)
3147     {
3148       vlib_get_next_frame (vm, node, next,
3149                            to_next, n_left_to_next);
3150
3151       while (n_left_from >= 4 && n_left_to_next >= 2)
3152         {
3153           vlib_buffer_t * p0, * p1;
3154           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3155           ip_lookup_next_t next0, next1;
3156           ip4_header_t * ip0, * ip1;
3157           ip_adjacency_t * adj0, * adj1;
3158           u32 fib_index0, fib_index1;
3159           u32 flow_hash_config0, flow_hash_config1;
3160
3161           /* Prefetch next iteration. */
3162           {
3163             vlib_buffer_t * p2, * p3;
3164
3165             p2 = vlib_get_buffer (vm, from[2]);
3166             p3 = vlib_get_buffer (vm, from[3]);
3167
3168             vlib_prefetch_buffer_header (p2, LOAD);
3169             vlib_prefetch_buffer_header (p3, LOAD);
3170
3171             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3172             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3173           }
3174
3175           pi0 = to_next[0] = from[0];
3176           pi1 = to_next[1] = from[1];
3177
3178           p0 = vlib_get_buffer (vm, pi0);
3179           p1 = vlib_get_buffer (vm, pi1);
3180
3181           ip0 = vlib_buffer_get_current (p0);
3182           ip1 = vlib_buffer_get_current (p1);
3183
3184           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3185           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3186           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3187             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3188           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3189             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3190
3191           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3192                                               &ip0->dst_address, p0);
3193           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3194                                               &ip1->dst_address, p1);
3195
3196           adj0 = ip_get_adjacency (lm, adj_index0);
3197           adj1 = ip_get_adjacency (lm, adj_index1);
3198
3199           next0 = adj0->lookup_next_index;
3200           next1 = adj1->lookup_next_index;
3201
3202           flow_hash_config0 = 
3203               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3204
3205           flow_hash_config1 = 
3206               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3207
3208           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3209               (ip0, flow_hash_config0);
3210                                                                   
3211           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3212               (ip1, flow_hash_config1);
3213
3214           ASSERT (adj0->n_adj > 0);
3215           ASSERT (adj1->n_adj > 0);
3216           ASSERT (is_pow2 (adj0->n_adj));
3217           ASSERT (is_pow2 (adj1->n_adj));
3218           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3219           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3220
3221           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3222           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3223
3224           if (1) /* $$$$$$ HACK FIXME */
3225           vlib_increment_combined_counter 
3226               (cm, cpu_index, adj_index0, 1,
3227                vlib_buffer_length_in_chain (vm, p0));
3228           if (1) /* $$$$$$ HACK FIXME */
3229           vlib_increment_combined_counter 
3230               (cm, cpu_index, adj_index1, 1,
3231                vlib_buffer_length_in_chain (vm, p1));
3232
3233           from += 2;
3234           to_next += 2;
3235           n_left_to_next -= 2;
3236           n_left_from -= 2;
3237
3238           wrong_next = (next0 != next) + 2*(next1 != next);
3239           if (PREDICT_FALSE (wrong_next != 0))
3240             {
3241               switch (wrong_next)
3242                 {
3243                 case 1:
3244                   /* A B A */
3245                   to_next[-2] = pi1;
3246                   to_next -= 1;
3247                   n_left_to_next += 1;
3248                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3249                   break;
3250
3251                 case 2:
3252                   /* A A B */
3253                   to_next -= 1;
3254                   n_left_to_next += 1;
3255                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3256                   break;
3257
3258                 case 3:
3259                   /* A B C */
3260                   to_next -= 2;
3261                   n_left_to_next += 2;
3262                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3263                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3264                   if (next0 == next1)
3265                     {
3266                       /* A B B */
3267                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3268                       next = next1;
3269                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3270                     }
3271                 }
3272             }
3273         }
3274     
3275       while (n_left_from > 0 && n_left_to_next > 0)
3276         {
3277           vlib_buffer_t * p0;
3278           ip4_header_t * ip0;
3279           u32 pi0, adj_index0;
3280           ip_lookup_next_t next0;
3281           ip_adjacency_t * adj0;
3282           u32 fib_index0;
3283           u32 flow_hash_config0;
3284
3285           pi0 = from[0];
3286           to_next[0] = pi0;
3287
3288           p0 = vlib_get_buffer (vm, pi0);
3289
3290           ip0 = vlib_buffer_get_current (p0);
3291
3292           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3293                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3294           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3295               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3296           
3297           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3298                                               &ip0->dst_address, p0);
3299
3300           adj0 = ip_get_adjacency (lm, adj_index0);
3301
3302           next0 = adj0->lookup_next_index;
3303
3304           flow_hash_config0 = 
3305               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3306
3307           vnet_buffer (p0)->ip.flow_hash = 
3308             ip4_compute_flow_hash (ip0, flow_hash_config0);
3309
3310           ASSERT (adj0->n_adj > 0);
3311           ASSERT (is_pow2 (adj0->n_adj));
3312           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3313
3314           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3315
3316           if (1) /* $$$$$$ HACK FIXME */
3317               vlib_increment_combined_counter 
3318                   (cm, cpu_index, adj_index0, 1,
3319                    vlib_buffer_length_in_chain (vm, p0));
3320
3321           from += 1;
3322           to_next += 1;
3323           n_left_to_next -= 1;
3324           n_left_from -= 1;
3325
3326           if (PREDICT_FALSE (next0 != next))
3327             {
3328               n_left_to_next += 1;
3329               vlib_put_next_frame (vm, node, next, n_left_to_next);
3330               next = next0;
3331               vlib_get_next_frame (vm, node, next,
3332                                    to_next, n_left_to_next);
3333               to_next[0] = pi0;
3334               to_next += 1;
3335               n_left_to_next -= 1;
3336             }
3337         }
3338
3339       vlib_put_next_frame (vm, node, next, n_left_to_next);
3340     }
3341
3342   if (node->flags & VLIB_NODE_FLAG_TRACE)
3343       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3344
3345   return frame->n_vectors;
3346 }
3347
3348 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3349   .function = ip4_lookup_multicast,
3350   .name = "ip4-lookup-multicast",
3351   .vector_size = sizeof (u32),
3352   .sibling_of = "ip4-lookup",
3353   .format_trace = format_ip4_lookup_trace,
3354
3355   .n_next_nodes = 0,
3356 };
3357
3358 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3359
3360 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3361   .function = ip4_drop,
3362   .name = "ip4-multicast",
3363   .vector_size = sizeof (u32),
3364
3365   .format_trace = format_ip4_forward_next_trace,
3366
3367   .n_next_nodes = 1,
3368   .next_nodes = {
3369     [0] = "error-drop",
3370   },
3371 };
3372
3373 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3374 {
3375   ip4_main_t * im = &ip4_main;
3376   ip4_fib_mtrie_t * mtrie0;
3377   ip4_fib_mtrie_leaf_t leaf0;
3378   u32 adj_index0;
3379     
3380   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3381
3382   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3383   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3384   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3385   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3386   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3387   
3388   /* Handle default route. */
3389   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3390   
3391   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3392   
3393   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3394                                                   a, 
3395                                                   /* no_default_route */ 0);
3396 }
3397  
3398 static clib_error_t *
3399 test_lookup_command_fn (vlib_main_t * vm,
3400                         unformat_input_t * input,
3401                         vlib_cli_command_t * cmd)
3402 {
3403   u32 table_id = 0;
3404   f64 count = 1;
3405   u32 n;
3406   int i;
3407   ip4_address_t ip4_base_address;
3408   u64 errors = 0;
3409
3410   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3411       if (unformat (input, "table %d", &table_id))
3412         ;
3413       else if (unformat (input, "count %f", &count))
3414         ;
3415
3416       else if (unformat (input, "%U",
3417                          unformat_ip4_address, &ip4_base_address))
3418         ;
3419       else
3420         return clib_error_return (0, "unknown input `%U'",
3421                                   format_unformat_error, input);
3422   }
3423
3424   n = count;
3425
3426   for (i = 0; i < n; i++)
3427     {
3428       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3429         errors++;
3430
3431       ip4_base_address.as_u32 = 
3432         clib_host_to_net_u32 (1 + 
3433                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3434     }
3435
3436   if (errors) 
3437     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3438   else
3439     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3440
3441   return 0;
3442 }
3443
3444 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3445     .path = "test lookup",
3446     .short_help = "test lookup",
3447     .function = test_lookup_command_fn,
3448 };
3449
3450 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3451 {
3452   ip4_main_t * im4 = &ip4_main;
3453   ip4_fib_t * fib;
3454   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3455
3456   if (p == 0)
3457     return VNET_API_ERROR_NO_SUCH_FIB;
3458
3459   fib = vec_elt_at_index (im4->fibs, p[0]);
3460
3461   fib->flow_hash_config = flow_hash_config;
3462   return 0;
3463 }
3464  
3465 static clib_error_t *
3466 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3467                              unformat_input_t * input,
3468                              vlib_cli_command_t * cmd)
3469 {
3470   int matched = 0;
3471   u32 table_id = 0;
3472   u32 flow_hash_config = 0;
3473   int rv;
3474
3475   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3476     if (unformat (input, "table %d", &table_id))
3477       matched = 1;
3478 #define _(a,v) \
3479     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3480     foreach_flow_hash_bit
3481 #undef _
3482     else break;
3483   }
3484   
3485   if (matched == 0)
3486     return clib_error_return (0, "unknown input `%U'",
3487                               format_unformat_error, input);
3488   
3489   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3490   switch (rv)
3491     {
3492     case 0:
3493       break;
3494       
3495     case VNET_API_ERROR_NO_SUCH_FIB:
3496       return clib_error_return (0, "no such FIB table %d", table_id);
3497       
3498     default:
3499       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3500       break;
3501     }
3502   
3503   return 0;
3504 }
3505  
3506 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3507   .path = "set ip flow-hash",
3508   .short_help = 
3509   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3510   .function = set_ip_flow_hash_command_fn,
3511 };
3512  
3513 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3514                                  u32 table_index)
3515 {
3516   vnet_main_t * vnm = vnet_get_main();
3517   vnet_interface_main_t * im = &vnm->interface_main;
3518   ip4_main_t * ipm = &ip4_main;
3519   ip_lookup_main_t * lm = &ipm->lookup_main;
3520   vnet_classify_main_t * cm = &vnet_classify_main;
3521
3522   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3523     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3524
3525   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3526     return VNET_API_ERROR_NO_SUCH_ENTRY;
3527
3528   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3529   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3530
3531   return 0;
3532 }
3533
3534 static clib_error_t *
3535 set_ip_classify_command_fn (vlib_main_t * vm,
3536                             unformat_input_t * input,
3537                             vlib_cli_command_t * cmd)
3538 {
3539   u32 table_index = ~0;
3540   int table_index_set = 0;
3541   u32 sw_if_index = ~0;
3542   int rv;
3543   
3544   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3545     if (unformat (input, "table-index %d", &table_index))
3546       table_index_set = 1;
3547     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3548                        vnet_get_main(), &sw_if_index))
3549       ;
3550     else
3551       break;
3552   }
3553       
3554   if (table_index_set == 0)
3555     return clib_error_return (0, "classify table-index must be specified");
3556
3557   if (sw_if_index == ~0)
3558     return clib_error_return (0, "interface / subif must be specified");
3559
3560   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3561
3562   switch (rv)
3563     {
3564     case 0:
3565       break;
3566
3567     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3568       return clib_error_return (0, "No such interface");
3569
3570     case VNET_API_ERROR_NO_SUCH_ENTRY:
3571       return clib_error_return (0, "No such classifier table");
3572     }
3573   return 0;
3574 }
3575
3576 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3577     .path = "set ip classify",
3578     .short_help = 
3579     "set ip classify intfc <int> table-index <index>",
3580     .function = set_ip_classify_command_fn,
3581 };
3582