VPP-142 Recursive route sending traffic to wrong interface
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
197     }
198 }
199
200 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip4_fib_t * fib;
204   u32 dst_address, dst_address_length, adj_index, old_adj_index;
205   uword * hash, is_del;
206   ip4_add_del_route_callback_t * cb;
207
208   /* Either create new adjacency or use given one depending on arguments. */
209   if (a->n_add_adj > 0)
210     {
211       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
212       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
213     }
214   else
215     adj_index = a->adj_index;
216
217   dst_address = a->dst_address.data_u32;
218   dst_address_length = a->dst_address_length;
219   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
220
221   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
222   dst_address &= im->fib_masks[dst_address_length];
223
224   if (! fib->adj_index_by_dst_address[dst_address_length])
225     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
226
227   hash = fib->adj_index_by_dst_address[dst_address_length];
228
229   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
230
231   if (is_del)
232     {
233       fib->old_hash_values[0] = ~0;
234       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
235       fib->adj_index_by_dst_address[dst_address_length] = hash;
236
237       if (vec_len (im->add_del_route_callbacks) > 0
238           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
239         {
240           fib->new_hash_values[0] = ~0;
241           vec_foreach (cb, im->add_del_route_callbacks)
242             if ((a->flags & cb->required_flags) == cb->required_flags)
243               cb->function (im, cb->function_opaque,
244                             fib, a->flags,
245                             &a->dst_address, dst_address_length,
246                             fib->old_hash_values,
247                             fib->new_hash_values);
248         }
249     }
250   else
251     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
252                            adj_index);
253
254   old_adj_index = fib->old_hash_values[0];
255
256   /* Avoid spurious reference count increments */
257   if (old_adj_index == adj_index
258       && adj_index != ~0
259       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
260     {
261       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
262       if (adj->share_count > 0)
263         adj->share_count --;
264     }
265
266   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
267                                is_del ? old_adj_index : adj_index,
268                                is_del);
269
270   /* Delete old adjacency index if present and changed. */
271   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
272       && old_adj_index != ~0
273       && old_adj_index != adj_index)
274     ip_del_adjacency (lm, old_adj_index);
275 }
276
277
278 u32
279 ip4_route_get_next_hop_adj (ip4_main_t * im,
280                             u32 fib_index,
281                             ip4_address_t *next_hop,
282                             u32 next_hop_sw_if_index,
283                             u32 explicit_fib_index)
284 {
285   ip_lookup_main_t * lm = &im->lookup_main;
286   vnet_main_t * vnm = vnet_get_main();
287   uword * nh_hash, * nh_result;
288   int is_interface_next_hop;
289   u32 nh_adj_index;
290   ip4_fib_t * fib;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293
294   is_interface_next_hop = next_hop->data_u32 == 0;
295   if (is_interface_next_hop)
296     {
297       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
298       if (nh_result)
299           nh_adj_index = *nh_result;
300       else
301         {
302            ip_adjacency_t * adj;
303            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
304                                    &nh_adj_index);
305            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
306            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
307            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
308         }
309     }
310   else if (next_hop_sw_if_index == ~0)
311     {
312       /* next-hop is recursive. we always need a indirect adj
313        * for recursive paths. Any LPM we perform now will give
314        * us a valid adj, but without tracking the next-hop we
315        * have no way to keep it valid.
316        */
317       ip_adjacency_t add_adj;
318       memset (&add_adj, 0, sizeof(add_adj));
319       add_adj.n_adj = 1;
320       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
321       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
322       add_adj.explicit_fib_index = explicit_fib_index;
323       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
324     }
325   else
326     {
327       nh_hash = fib->adj_index_by_dst_address[32];
328       nh_result = hash_get (nh_hash, next_hop->data_u32);
329
330       /* Next hop must be known. */
331       if (! nh_result)
332         {
333           ip_adjacency_t * adj;
334
335           /* no /32 exists, get the longest prefix match */
336           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
337                                                     next_hop, 0);
338           adj = ip_get_adjacency (lm, nh_adj_index);
339           /* if ARP interface adjacency is present, we need to
340              install ARP adjaceny for specific next hop */
341           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
342               adj->arp.next_hop.ip4.as_u32 == 0)
343             {
344               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
345             }
346         }
347       else
348         {
349           nh_adj_index = *nh_result;
350         }
351     }
352
353   return (nh_adj_index);
354 }
355
356 void
357 ip4_add_del_route_next_hop (ip4_main_t * im,
358                             u32 flags,
359                             ip4_address_t * dst_address,
360                             u32 dst_address_length,
361                             ip4_address_t * next_hop,
362                             u32 next_hop_sw_if_index,
363                             u32 next_hop_weight, u32 adj_index, 
364                             u32 explicit_fib_index)
365 {
366   vnet_main_t * vnm = vnet_get_main();
367   ip_lookup_main_t * lm = &im->lookup_main;
368   u32 fib_index;
369   ip4_fib_t * fib;
370   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
371   u32 dst_adj_index, nh_adj_index;
372   uword * dst_hash, * dst_result;
373   ip_adjacency_t * dst_adj;
374   ip_multipath_adjacency_t * old_mp, * new_mp;
375   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
376   clib_error_t * error = 0;
377
378   if (explicit_fib_index == (u32)~0)
379       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
380   else
381       fib_index = explicit_fib_index;
382
383   fib = vec_elt_at_index (im->fibs, fib_index);
384
385   /* Lookup next hop to be added or deleted. */
386   if (adj_index == (u32)~0)
387     {
388         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
389                                                   next_hop,
390                                                   next_hop_sw_if_index,
391                                                   explicit_fib_index);
392     }
393   else
394     {
395       nh_adj_index = adj_index;
396     }
397   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
398   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
399
400   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
401   dst_result = hash_get (dst_hash, dst_address_u32);
402   if (dst_result)
403     {
404       dst_adj_index = dst_result[0];
405       dst_adj = ip_get_adjacency (lm, dst_adj_index);
406     }
407   else
408     {
409       /* For deletes destination must be known. */
410       if (is_del)
411         {
412           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
413           error = clib_error_return (0, "unknown destination %U/%d",
414                                      format_ip4_address, dst_address,
415                                      dst_address_length);
416           goto done;
417         }
418
419       dst_adj_index = ~0;
420       dst_adj = 0;
421     }
422
423   /* Ignore adds of X/32 with next hop of X. */
424   if (! is_del
425       && dst_address_length == 32
426       && dst_address->data_u32 == next_hop->data_u32 
427       && adj_index != (u32)~0)
428     {
429       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
430       error = clib_error_return (0, "prefix matches next hop %U/%d",
431                                  format_ip4_address, dst_address,
432                                  dst_address_length);
433       goto done;
434     }
435
436   /* Destination is not known and default weight is set so add route
437      to existing non-multipath adjacency */
438   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
439     {
440       /* create / delete additional mapping of existing adjacency */
441       ip4_add_del_route_args_t a;
442       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
443
444       a.table_index_or_table_id = fib_index;
445       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
446                  | IP4_ROUTE_FLAG_FIB_INDEX
447                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
448                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
449                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
450       a.dst_address = dst_address[0];
451       a.dst_address_length = dst_address_length;
452       a.adj_index = nh_adj_index;
453       a.add_adj = 0;
454       a.n_add_adj = 0;
455
456       ip4_add_del_route (im, &a);
457
458       /* adjust share count. This cannot be the only use of the adjacency */
459       nh_adj->share_count += is_del ? -1 : 1;
460         
461       goto done;
462     }
463
464   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
465
466   if (! ip_multipath_adjacency_add_del_next_hop
467       (lm, is_del,
468        old_mp_adj_index,
469        nh_adj_index,
470        next_hop_weight,
471        &new_mp_adj_index))
472     {
473       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
474       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
475                                  format_ip4_address, next_hop);
476       goto done;
477     }
478   
479   old_mp = new_mp = 0;
480   if (old_mp_adj_index != ~0)
481     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
482   if (new_mp_adj_index != ~0)
483     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
484
485   if (old_mp != new_mp)
486     {
487       ip4_add_del_route_args_t a;
488       ip_adjacency_t * adj;
489
490       a.table_index_or_table_id = fib_index;
491       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
492                  | IP4_ROUTE_FLAG_FIB_INDEX
493                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
494                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
495       a.dst_address = dst_address[0];
496       a.dst_address_length = dst_address_length;
497       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
498       a.add_adj = 0;
499       a.n_add_adj = 0;
500
501       ip4_add_del_route (im, &a);
502
503       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
504       if (adj->n_adj == 1)
505         adj->share_count += is_del ? -1 : 1;
506     }
507
508  done:
509   if (error)
510     clib_error_report (error);
511 }
512
513 void *
514 ip4_get_route (ip4_main_t * im,
515                u32 table_index_or_table_id,
516                u32 flags,
517                u8 * address,
518                u32 address_length)
519 {
520   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
521   u32 dst_address = * (u32 *) address;
522   uword * hash, * p;
523
524   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
525   dst_address &= im->fib_masks[address_length];
526
527   hash = fib->adj_index_by_dst_address[address_length];
528   p = hash_get (hash, dst_address);
529   return (void *) p;
530 }
531
532 void
533 ip4_foreach_matching_route (ip4_main_t * im,
534                             u32 table_index_or_table_id,
535                             u32 flags,
536                             ip4_address_t * address,
537                             u32 address_length,
538                             ip4_address_t ** results,
539                             u8 ** result_lengths)
540 {
541   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
542   u32 dst_address = address->data_u32;
543   u32 this_length = address_length;
544   
545   if (*results)
546     _vec_len (*results) = 0;
547   if (*result_lengths)
548     _vec_len (*result_lengths) = 0;
549
550   while (this_length <= 32 && vec_len (results) == 0)
551     {
552       uword k, v;
553       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
554         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
555           {
556             ip4_address_t a;
557             a.data_u32 = k;
558             vec_add1 (*results, a);
559             vec_add1 (*result_lengths, this_length);
560           }
561       }));
562
563       this_length++;
564     }
565 }
566
567 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
568                                   u32 table_index_or_table_id,
569                                   u32 flags)
570 {
571   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
572   ip_lookup_main_t * lm = &im->lookup_main;
573   u32 i, l;
574   ip4_address_t a;
575   ip4_add_del_route_callback_t * cb;
576   static ip4_address_t * to_delete;
577
578   if (lm->n_adjacency_remaps == 0)
579     return;
580
581   for (l = 0; l <= 32; l++)
582     {
583       hash_pair_t * p;
584       uword * hash = fib->adj_index_by_dst_address[l];
585
586       if (hash_elts (hash) == 0)
587         continue;
588
589       if (to_delete)
590         _vec_len (to_delete) = 0;
591
592       hash_foreach_pair (p, hash, ({
593         u32 adj_index = p->value[0];
594         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
595
596         if (m)
597           {
598             /* Record destination address from hash key. */
599             a.data_u32 = p->key;
600
601             /* New adjacency points to nothing: so delete prefix. */
602             if (m == ~0)
603               vec_add1 (to_delete, a);
604             else
605               {
606                 /* Remap to new adjacency. */
607                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
608
609                 /* Set new adjacency value. */
610                 fib->new_hash_values[0] = p->value[0] = m - 1;
611
612                 vec_foreach (cb, im->add_del_route_callbacks)
613                   if ((flags & cb->required_flags) == cb->required_flags)
614                     cb->function (im, cb->function_opaque,
615                                   fib, flags | IP4_ROUTE_FLAG_ADD,
616                                   &a, l,
617                                   fib->old_hash_values,
618                                   fib->new_hash_values);
619               }
620           }
621       }));
622
623       fib->new_hash_values[0] = ~0;
624       for (i = 0; i < vec_len (to_delete); i++)
625         {
626           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
627           vec_foreach (cb, im->add_del_route_callbacks)
628             if ((flags & cb->required_flags) == cb->required_flags)
629               cb->function (im, cb->function_opaque,
630                             fib, flags | IP4_ROUTE_FLAG_DEL,
631                             &a, l,
632                             fib->old_hash_values,
633                             fib->new_hash_values);
634         }
635     }
636
637   /* Also remap adjacencies in mtrie. */
638   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
639
640   /* Reset mapping table. */
641   vec_zero (lm->adjacency_remap_table);
642
643   /* All remaps have been performed. */
644   lm->n_adjacency_remaps = 0;
645 }
646
647 void ip4_delete_matching_routes (ip4_main_t * im,
648                                  u32 table_index_or_table_id,
649                                  u32 flags,
650                                  ip4_address_t * address,
651                                  u32 address_length)
652 {
653   static ip4_address_t * matching_addresses;
654   static u8 * matching_address_lengths;
655   u32 l, i;
656   ip4_add_del_route_args_t a;
657
658   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
659   a.table_index_or_table_id = table_index_or_table_id;
660   a.adj_index = ~0;
661   a.add_adj = 0;
662   a.n_add_adj = 0;
663
664   for (l = address_length + 1; l <= 32; l++)
665     {
666       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
667                                   address,
668                                   l,
669                                   &matching_addresses,
670                                   &matching_address_lengths);
671       for (i = 0; i < vec_len (matching_addresses); i++)
672         {
673           a.dst_address = matching_addresses[i];
674           a.dst_address_length = matching_address_lengths[i];
675           ip4_add_del_route (im, &a);
676         }
677     }
678
679   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
680 }
681
682 void
683 ip4_forward_next_trace (vlib_main_t * vm,
684                         vlib_node_runtime_t * node,
685                         vlib_frame_t * frame,
686                         vlib_rx_or_tx_t which_adj_index);
687
688 always_inline uword
689 ip4_lookup_inline (vlib_main_t * vm,
690                    vlib_node_runtime_t * node,
691                    vlib_frame_t * frame,
692                    int lookup_for_responses_to_locally_received_packets,
693                    int is_indirect)
694 {
695   ip4_main_t * im = &ip4_main;
696   ip_lookup_main_t * lm = &im->lookup_main;
697   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
698   u32 n_left_from, n_left_to_next, * from, * to_next;
699   ip_lookup_next_t next;
700   u32 cpu_index = os_get_cpu_number();
701
702   from = vlib_frame_vector_args (frame);
703   n_left_from = frame->n_vectors;
704   next = node->cached_next_index;
705
706   while (n_left_from > 0)
707     {
708       vlib_get_next_frame (vm, node, next,
709                            to_next, n_left_to_next);
710
711       while (n_left_from >= 4 && n_left_to_next >= 2)
712         {
713           vlib_buffer_t * p0, * p1;
714           ip4_header_t * ip0, * ip1;
715           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
716           ip_lookup_next_t next0, next1;
717           ip_adjacency_t * adj0, * adj1;
718           ip4_fib_mtrie_t * mtrie0, * mtrie1;
719           ip4_fib_mtrie_leaf_t leaf0, leaf1;
720           ip4_address_t * dst_addr0, *dst_addr1;
721           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
722           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
723           u32 flow_hash_config0, flow_hash_config1;
724           u32 hash_c0, hash_c1;
725           u32 wrong_next;
726
727           /* Prefetch next iteration. */
728           {
729             vlib_buffer_t * p2, * p3;
730
731             p2 = vlib_get_buffer (vm, from[2]);
732             p3 = vlib_get_buffer (vm, from[3]);
733
734             vlib_prefetch_buffer_header (p2, LOAD);
735             vlib_prefetch_buffer_header (p3, LOAD);
736
737             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
738             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
739           }
740
741           pi0 = to_next[0] = from[0];
742           pi1 = to_next[1] = from[1];
743
744           p0 = vlib_get_buffer (vm, pi0);
745           p1 = vlib_get_buffer (vm, pi1);
746
747           ip0 = vlib_buffer_get_current (p0);
748           ip1 = vlib_buffer_get_current (p1);
749
750           if (is_indirect)
751             {
752               ip_adjacency_t * iadj0, * iadj1;
753               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
754               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
755               dst_addr0 = &iadj0->indirect.next_hop.ip4;
756               dst_addr1 = &iadj1->indirect.next_hop.ip4;
757             }
758           else
759             {
760               dst_addr0 = &ip0->dst_address;
761               dst_addr1 = &ip1->dst_address;
762             }
763
764           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
765           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
766           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
767             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
768           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
769             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
770
771
772           if (! lookup_for_responses_to_locally_received_packets)
773             {
774               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
775               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
776
777               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
778
779               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
780               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
781             }
782
783           tcp0 = (void *) (ip0 + 1);
784           tcp1 = (void *) (ip1 + 1);
785
786           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
787                          || ip0->protocol == IP_PROTOCOL_UDP);
788           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
789                          || ip1->protocol == IP_PROTOCOL_UDP);
790
791           if (! lookup_for_responses_to_locally_received_packets)
792             {
793               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
794               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
795             }
796
797           if (! lookup_for_responses_to_locally_received_packets)
798             {
799               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
800               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
801             }
802
803           if (! lookup_for_responses_to_locally_received_packets)
804             {
805               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
806               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
807             }
808
809           if (lookup_for_responses_to_locally_received_packets)
810             {
811               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
812               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
813             }
814           else
815             {
816               /* Handle default route. */
817               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
818               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
819
820               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
821               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
822             }
823
824           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
825                                                            dst_addr0,
826                                                            /* no_default_route */ 0));
827           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
828                                                            dst_addr1,
829                                                            /* no_default_route */ 0));
830           adj0 = ip_get_adjacency (lm, adj_index0);
831           adj1 = ip_get_adjacency (lm, adj_index1);
832
833           next0 = adj0->lookup_next_index;
834           next1 = adj1->lookup_next_index;
835
836           /* Use flow hash to compute multipath adjacency. */
837           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
838           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
839           if (PREDICT_FALSE (adj0->n_adj > 1))
840             {
841               flow_hash_config0 = 
842                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
843               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
844                 ip4_compute_flow_hash (ip0, flow_hash_config0);
845             }
846           if (PREDICT_FALSE(adj1->n_adj > 1))
847             {
848               flow_hash_config1 = 
849                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
850               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
851                 ip4_compute_flow_hash (ip1, flow_hash_config1);
852             }
853
854           ASSERT (adj0->n_adj > 0);
855           ASSERT (adj1->n_adj > 0);
856           ASSERT (is_pow2 (adj0->n_adj));
857           ASSERT (is_pow2 (adj1->n_adj));
858           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
859           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
860
861           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
862           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
863
864           vlib_increment_combined_counter 
865               (cm, cpu_index, adj_index0, 1,
866                vlib_buffer_length_in_chain (vm, p0) 
867                + sizeof(ethernet_header_t));
868           vlib_increment_combined_counter 
869               (cm, cpu_index, adj_index1, 1,
870                vlib_buffer_length_in_chain (vm, p1)
871                + sizeof(ethernet_header_t));
872
873           from += 2;
874           to_next += 2;
875           n_left_to_next -= 2;
876           n_left_from -= 2;
877
878           wrong_next = (next0 != next) + 2*(next1 != next);
879           if (PREDICT_FALSE (wrong_next != 0))
880             {
881               switch (wrong_next)
882                 {
883                 case 1:
884                   /* A B A */
885                   to_next[-2] = pi1;
886                   to_next -= 1;
887                   n_left_to_next += 1;
888                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
889                   break;
890
891                 case 2:
892                   /* A A B */
893                   to_next -= 1;
894                   n_left_to_next += 1;
895                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
896                   break;
897
898                 case 3:
899                   /* A B C */
900                   to_next -= 2;
901                   n_left_to_next += 2;
902                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
903                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
904                   if (next0 == next1)
905                     {
906                       /* A B B */
907                       vlib_put_next_frame (vm, node, next, n_left_to_next);
908                       next = next1;
909                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
910                     }
911                 }
912             }
913         }
914     
915       while (n_left_from > 0 && n_left_to_next > 0)
916         {
917           vlib_buffer_t * p0;
918           ip4_header_t * ip0;
919           __attribute__((unused)) tcp_header_t * tcp0;
920           ip_lookup_next_t next0;
921           ip_adjacency_t * adj0;
922           ip4_fib_mtrie_t * mtrie0;
923           ip4_fib_mtrie_leaf_t leaf0;
924           ip4_address_t * dst_addr0;
925           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
926           u32 flow_hash_config0, hash_c0;
927
928           pi0 = from[0];
929           to_next[0] = pi0;
930
931           p0 = vlib_get_buffer (vm, pi0);
932
933           ip0 = vlib_buffer_get_current (p0);
934
935           if (is_indirect)
936             {
937               ip_adjacency_t * iadj0;
938               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
939               dst_addr0 = &iadj0->indirect.next_hop.ip4;
940             }
941           else
942             {
943               dst_addr0 = &ip0->dst_address;
944             }
945
946           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
947           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
948             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
949
950           if (! lookup_for_responses_to_locally_received_packets)
951             {
952               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
953
954               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
955
956               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
957             }
958
959           tcp0 = (void *) (ip0 + 1);
960
961           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
962                          || ip0->protocol == IP_PROTOCOL_UDP);
963
964           if (! lookup_for_responses_to_locally_received_packets)
965             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
966
967           if (! lookup_for_responses_to_locally_received_packets)
968             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
969
970           if (! lookup_for_responses_to_locally_received_packets)
971             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
972
973           if (lookup_for_responses_to_locally_received_packets)
974             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
975           else
976             {
977               /* Handle default route. */
978               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
979               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
980             }
981
982           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
983                                                            dst_addr0,
984                                                            /* no_default_route */ 0));
985
986           adj0 = ip_get_adjacency (lm, adj_index0);
987
988           next0 = adj0->lookup_next_index;
989
990           /* Use flow hash to compute multipath adjacency. */
991           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
992           if (PREDICT_FALSE(adj0->n_adj > 1))
993             {
994               flow_hash_config0 = 
995                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
996
997               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
998                 ip4_compute_flow_hash (ip0, flow_hash_config0);
999             }
1000
1001           ASSERT (adj0->n_adj > 0);
1002           ASSERT (is_pow2 (adj0->n_adj));
1003           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1004
1005           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1006
1007           vlib_increment_combined_counter 
1008               (cm, cpu_index, adj_index0, 1,
1009                vlib_buffer_length_in_chain (vm, p0)
1010                + sizeof(ethernet_header_t));
1011
1012           from += 1;
1013           to_next += 1;
1014           n_left_to_next -= 1;
1015           n_left_from -= 1;
1016
1017           if (PREDICT_FALSE (next0 != next))
1018             {
1019               n_left_to_next += 1;
1020               vlib_put_next_frame (vm, node, next, n_left_to_next);
1021               next = next0;
1022               vlib_get_next_frame (vm, node, next,
1023                                    to_next, n_left_to_next);
1024               to_next[0] = pi0;
1025               to_next += 1;
1026               n_left_to_next -= 1;
1027             }
1028         }
1029
1030       vlib_put_next_frame (vm, node, next, n_left_to_next);
1031     }
1032
1033   if (node->flags & VLIB_NODE_FLAG_TRACE)
1034     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1035
1036   return frame->n_vectors;
1037 }
1038
1039 /** \brief IPv4 lookup node.
1040     @node ip4-lookup
1041
1042     This is the main IPv4 lookup dispatch node.
1043
1044     @param vm vlib_main_t corresponding to the current thread
1045     @param node vlib_node_runtime_t
1046     @param frame vlib_frame_t whose contents should be dispatched
1047
1048     @par Graph mechanics: buffer metadata, next index usage
1049
1050     @em Uses:
1051     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1052         - Indicates the @c sw_if_index value of the interface that the
1053           packet was received on.
1054     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1055         - When the value is @c ~0 then the node performs a longest prefix
1056           match (LPM) for the packet destination address in the FIB attached
1057           to the receive interface.
1058         - Otherwise perform LPM for the packet destination address in the
1059           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1060           value (0, 1, ...) and not a VRF id.
1061
1062     @em Sets:
1063     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1064         - The lookup result adjacency index.
1065
1066     <em>Next Index:</em>
1067     - Dispatches the packet to the node index found in
1068       ip_adjacency_t @c adj->lookup_next_index
1069       (where @c adj is the lookup result adjacency).
1070 */
1071 static uword
1072 ip4_lookup (vlib_main_t * vm,
1073             vlib_node_runtime_t * node,
1074             vlib_frame_t * frame)
1075 {
1076   return ip4_lookup_inline (vm, node, frame,
1077                             /* lookup_for_responses_to_locally_received_packets */ 0,
1078                             /* is_indirect */ 0);
1079
1080 }
1081
1082 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1083                                         ip_adjacency_t * adj,
1084                                         u32 sw_if_index,
1085                                         u32 if_address_index)
1086 {
1087   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1088   ip_lookup_next_t n;
1089   vnet_l3_packet_type_t packet_type;
1090   u32 node_index;
1091
1092   if (hw->hw_class_index == ethernet_hw_interface_class.index
1093       || hw->hw_class_index == srp_hw_interface_class.index)
1094     {
1095       /* 
1096        * We have a bit of a problem in this case. ip4-arp uses
1097        * the rewrite_header.next_index to hand pkts to the
1098        * indicated inteface output node. We can end up in
1099        * ip4_rewrite_local, too, which also pays attention to 
1100        * rewrite_header.next index. Net result: a hack in
1101        * ip4_rewrite_local...
1102        */
1103       n = IP_LOOKUP_NEXT_ARP;
1104       node_index = ip4_arp_node.index;
1105       adj->if_address_index = if_address_index;
1106       adj->arp.next_hop.ip4.as_u32 = 0;
1107       ip46_address_reset(&adj->arp.next_hop);
1108       packet_type = VNET_L3_PACKET_TYPE_ARP;
1109     }
1110   else
1111     {
1112       n = IP_LOOKUP_NEXT_REWRITE;
1113       node_index = ip4_rewrite_node.index;
1114       packet_type = VNET_L3_PACKET_TYPE_IP4;
1115     }
1116
1117   adj->lookup_next_index = n;
1118   vnet_rewrite_for_sw_interface
1119     (vnm,
1120      packet_type,
1121      sw_if_index,
1122      node_index,
1123      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1124      &adj->rewrite_header,
1125      sizeof (adj->rewrite_data));
1126 }
1127
1128 static void
1129 ip4_add_interface_routes (u32 sw_if_index,
1130                           ip4_main_t * im, u32 fib_index,
1131                           ip_interface_address_t * a)
1132 {
1133   vnet_main_t * vnm = vnet_get_main();
1134   ip_lookup_main_t * lm = &im->lookup_main;
1135   ip_adjacency_t * adj;
1136   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1137   ip4_add_del_route_args_t x;
1138   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1139   u32 classify_table_index;
1140
1141   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1142   x.table_index_or_table_id = fib_index;
1143   x.flags = (IP4_ROUTE_FLAG_ADD
1144              | IP4_ROUTE_FLAG_FIB_INDEX
1145              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1146   x.dst_address = address[0];
1147   x.dst_address_length = a->address_length;
1148   x.n_add_adj = 0;
1149   x.add_adj = 0;
1150
1151   a->neighbor_probe_adj_index = ~0;
1152   if (a->address_length < 32)
1153     {
1154       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1155                               &x.adj_index);
1156       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1157       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1158       ip4_add_del_route (im, &x);
1159       a->neighbor_probe_adj_index = x.adj_index;
1160     }
1161   
1162   /* Add e.g. 1.1.1.1/32 as local to this host. */
1163   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1164                           &x.adj_index);
1165   
1166   classify_table_index = ~0;
1167   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1168     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1169   if (classify_table_index != (u32) ~0)
1170     {
1171       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1172       adj->classify.table_index = classify_table_index;
1173     }
1174   else
1175     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1176   
1177   adj->if_address_index = a - lm->if_address_pool;
1178   adj->rewrite_header.sw_if_index = sw_if_index;
1179   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1180   /* 
1181    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1182    * fail an RPF-ish check, but still go thru the rewrite code...
1183    */
1184   adj->rewrite_header.data_bytes = 0;
1185
1186   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1187   x.dst_address_length = 32;
1188   ip4_add_del_route (im, &x);
1189 }
1190
1191 static void
1192 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1193 {
1194   ip4_add_del_route_args_t x;
1195
1196   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1197   x.table_index_or_table_id = fib_index;
1198   x.flags = (IP4_ROUTE_FLAG_DEL
1199              | IP4_ROUTE_FLAG_FIB_INDEX
1200              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1201   x.dst_address = address[0];
1202   x.dst_address_length = address_length;
1203   x.adj_index = ~0;
1204   x.n_add_adj = 0;
1205   x.add_adj = 0;
1206
1207   if (address_length < 32)
1208     ip4_add_del_route (im, &x);
1209
1210   x.dst_address_length = 32;
1211   ip4_add_del_route (im, &x);
1212
1213   ip4_delete_matching_routes (im,
1214                               fib_index,
1215                               IP4_ROUTE_FLAG_FIB_INDEX,
1216                               address,
1217                               address_length);
1218 }
1219
1220 typedef struct {
1221     u32 sw_if_index;
1222     ip4_address_t address;
1223     u32 length;
1224 } ip4_interface_address_t;
1225
1226 static clib_error_t *
1227 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1228                                         u32 sw_if_index,
1229                                         ip4_address_t * new_address,
1230                                         u32 new_length,
1231                                         u32 redistribute,
1232                                         u32 insert_routes,
1233                                         u32 is_del);
1234
1235 static clib_error_t *
1236 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1237                                         u32 sw_if_index,
1238                                         ip4_address_t * address,
1239                                         u32 address_length,
1240                                         u32 redistribute,
1241                                         u32 insert_routes,
1242                                         u32 is_del)
1243 {
1244   vnet_main_t * vnm = vnet_get_main();
1245   ip4_main_t * im = &ip4_main;
1246   ip_lookup_main_t * lm = &im->lookup_main;
1247   clib_error_t * error = 0;
1248   u32 if_address_index, elts_before;
1249   ip4_address_fib_t ip4_af, * addr_fib = 0;
1250
1251   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1252   ip4_addr_fib_init (&ip4_af, address,
1253                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1254   vec_add1 (addr_fib, ip4_af);
1255
1256   /* When adding an address check that it does not conflict with an existing address. */
1257   if (! is_del)
1258     {
1259       ip_interface_address_t * ia;
1260       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1261                                     0 /* honor unnumbered */,
1262       ({
1263         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1264
1265         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1266             || ip4_destination_matches_route (im, x, address, address_length))
1267           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1268                                     format_ip4_address_and_length, address, address_length,
1269                                     format_ip4_address_and_length, x, ia->address_length,
1270                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1271       }));
1272     }
1273
1274   elts_before = pool_elts (lm->if_address_pool);
1275
1276   error = ip_interface_address_add_del
1277     (lm,
1278      sw_if_index,
1279      addr_fib,
1280      address_length,
1281      is_del,
1282      &if_address_index);
1283   if (error)
1284     goto done;
1285   
1286   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1287     {
1288       if (is_del)
1289         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1290                                   address_length);
1291       
1292       else
1293           ip4_add_interface_routes (sw_if_index,
1294                                     im, ip4_af.fib_index,
1295                                     pool_elt_at_index 
1296                                     (lm->if_address_pool, if_address_index));
1297     }
1298
1299   /* If pool did not grow/shrink: add duplicate address. */
1300   if (elts_before != pool_elts (lm->if_address_pool))
1301     {
1302       ip4_add_del_interface_address_callback_t * cb;
1303       vec_foreach (cb, im->add_del_interface_address_callbacks)
1304         cb->function (im, cb->function_opaque, sw_if_index,
1305                       address, address_length,
1306                       if_address_index,
1307                       is_del);
1308     }
1309
1310  done:
1311   vec_free (addr_fib);
1312   return error;
1313 }
1314
1315 clib_error_t *
1316 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1317                                ip4_address_t * address, u32 address_length,
1318                                u32 is_del)
1319 {
1320   return ip4_add_del_interface_address_internal
1321     (vm, sw_if_index, address, address_length,
1322      /* redistribute */ 1,
1323      /* insert_routes */ 1,
1324      is_del);
1325 }
1326
1327 static clib_error_t *
1328 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1329                                 u32 sw_if_index,
1330                                 u32 flags)
1331 {
1332   ip4_main_t * im = &ip4_main;
1333   ip_interface_address_t * ia;
1334   ip4_address_t * a;
1335   u32 is_admin_up, fib_index;
1336   
1337   /* Fill in lookup tables with default table (0). */
1338   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1339   
1340   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1341   
1342   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1343   
1344   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1345
1346   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1347                                 0 /* honor unnumbered */,
1348   ({
1349     a = ip_interface_address_get_address (&im->lookup_main, ia);
1350     if (is_admin_up)
1351       ip4_add_interface_routes (sw_if_index,
1352                                 im, fib_index,
1353                                 ia);
1354     else
1355       ip4_del_interface_routes (im, fib_index,
1356                                 a, ia->address_length);
1357   }));
1358
1359   return 0;
1360 }
1361  
1362 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1363
1364 /* Built-in ip4 unicast rx feature path definition */
1365 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1366   .node_name = "ip4-inacl", 
1367   .runs_before = {"ip4-source-check-via-rx", 0}, 
1368   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1369 };
1370
1371 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1372   .node_name = "ip4-source-check-via-rx",
1373   .runs_before = {"ip4-source-check-via-any", 0},
1374   .feature_index = 
1375   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1376 };
1377
1378 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1379   .node_name = "ip4-source-check-via-any",
1380   .runs_before = {"ipsec-input-ip4", 0},
1381   .feature_index = 
1382   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1383 };
1384
1385 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1386   .node_name = "ipsec-input-ip4",
1387   .runs_before = {"vpath-input-ip4", 0},
1388   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1389 };
1390
1391 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1392   .node_name = "vpath-input-ip4",
1393   .runs_before = {"ip4-lookup", 0},
1394   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1395 };
1396
1397 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1398   .node_name = "ip4-lookup",
1399   .runs_before = {0}, /* not before any other features */
1400   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1401 };
1402
1403 /* Built-in ip4 multicast rx feature path definition */
1404 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1405   .node_name = "vpath-input-ip4",
1406   .runs_before = {"ip4-lookup-multicast", 0},
1407   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1408 };
1409
1410 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1411   .node_name = "ip4-lookup-multicast",
1412   .runs_before = {0}, /* not before any other features */
1413   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1414 };
1415
1416 static char * feature_start_nodes[] = 
1417   { "ip4-input", "ip4-input-no-checksum"};
1418
1419 static clib_error_t *
1420 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1421 {
1422   ip_lookup_main_t * lm = &im->lookup_main;
1423   clib_error_t * error;
1424   vnet_cast_t cast;
1425
1426   for (cast = 0; cast < VNET_N_CAST; cast++)
1427     {
1428       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1429       vnet_config_main_t * vcm = &cm->config_main;
1430
1431       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1432                                          feature_start_nodes,
1433                                          ARRAY_LEN(feature_start_nodes),
1434                                          cast,
1435                                          1 /* is_ip4 */)))
1436         return error;
1437     }
1438   return 0;
1439 }
1440
1441 static clib_error_t *
1442 ip4_sw_interface_add_del (vnet_main_t * vnm,
1443                           u32 sw_if_index,
1444                           u32 is_add)
1445 {
1446   vlib_main_t * vm = vnm->vlib_main;
1447   ip4_main_t * im = &ip4_main;
1448   ip_lookup_main_t * lm = &im->lookup_main;
1449   u32 ci, cast;
1450   u32 feature_index;
1451
1452   for (cast = 0; cast < VNET_N_CAST; cast++)
1453     {
1454       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1455       vnet_config_main_t * vcm = &cm->config_main;
1456
1457       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1458       ci = cm->config_index_by_sw_if_index[sw_if_index];
1459
1460       if (cast == VNET_UNICAST)
1461         feature_index = im->ip4_unicast_rx_feature_lookup;
1462       else
1463         feature_index = im->ip4_multicast_rx_feature_lookup;
1464
1465       if (is_add)
1466         ci = vnet_config_add_feature (vm, vcm,
1467                                       ci,
1468                                       feature_index,
1469                                       /* config data */ 0,
1470                                       /* # bytes of config data */ 0);
1471       else
1472         ci = vnet_config_del_feature (vm, vcm,
1473                                       ci,
1474                                       feature_index,
1475                                       /* config data */ 0,
1476                                       /* # bytes of config data */ 0);
1477
1478       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1479     }
1480
1481   return /* no error */ 0;
1482 }
1483
1484 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1485
1486 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1487
1488 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1489   .function = ip4_lookup,
1490   .name = "ip4-lookup",
1491   .vector_size = sizeof (u32),
1492
1493   .format_trace = format_ip4_lookup_trace,
1494
1495   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1496   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1497 };
1498
1499 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1500
1501 static uword
1502 ip4_indirect (vlib_main_t * vm,
1503                vlib_node_runtime_t * node,
1504                vlib_frame_t * frame)
1505 {
1506   return ip4_lookup_inline (vm, node, frame,
1507                             /* lookup_for_responses_to_locally_received_packets */ 0,
1508                             /* is_indirect */ 1);
1509 }
1510
1511 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1512   .function = ip4_indirect,
1513   .name = "ip4-indirect",
1514   .vector_size = sizeof (u32),
1515   .sibling_of = "ip4-lookup",
1516   .format_trace = format_ip4_lookup_trace,
1517
1518   .n_next_nodes = 0,
1519 };
1520
1521 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1522
1523
1524 /* Global IP4 main. */
1525 ip4_main_t ip4_main;
1526
1527 clib_error_t *
1528 ip4_lookup_init (vlib_main_t * vm)
1529 {
1530   ip4_main_t * im = &ip4_main;
1531   clib_error_t * error;
1532   uword i;
1533
1534   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1535     {
1536       u32 m;
1537
1538       if (i < 32)
1539         m = pow2_mask (i) << (32 - i);
1540       else 
1541         m = ~0;
1542       im->fib_masks[i] = clib_host_to_net_u32 (m);
1543     }
1544
1545   /* Create FIB with index 0 and table id of 0. */
1546   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1547
1548   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1549
1550   {
1551     pg_node_t * pn;
1552     pn = pg_get_node (ip4_lookup_node.index);
1553     pn->unformat_edit = unformat_pg_ip4_header;
1554   }
1555
1556   {
1557     ethernet_arp_header_t h;
1558
1559     memset (&h, 0, sizeof (h));
1560
1561     /* Set target ethernet address to all zeros. */
1562     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1563
1564 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1565 #define _8(f,v) h.f = v;
1566     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1567     _16 (l3_type, ETHERNET_TYPE_IP4);
1568     _8 (n_l2_address_bytes, 6);
1569     _8 (n_l3_address_bytes, 4);
1570     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1571 #undef _16
1572 #undef _8
1573
1574     vlib_packet_template_init (vm,
1575                                &im->ip4_arp_request_packet_template,
1576                                /* data */ &h,
1577                                sizeof (h),
1578                                /* alloc chunk size */ 8,
1579                                "ip4 arp");
1580   }
1581
1582   error = ip4_feature_init (vm, im);
1583
1584   return error;
1585 }
1586
1587 VLIB_INIT_FUNCTION (ip4_lookup_init);
1588
1589 typedef struct {
1590   /* Adjacency taken. */
1591   u32 adj_index;
1592   u32 flow_hash;
1593   u32 fib_index;
1594
1595   /* Packet data, possibly *after* rewrite. */
1596   u8 packet_data[64 - 1*sizeof(u32)];
1597 } ip4_forward_next_trace_t;
1598
1599 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1600 {
1601   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1602   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1603   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1604   uword indent = format_get_indent (s);
1605   s = format (s, "%U%U",
1606                 format_white_space, indent,
1607                 format_ip4_header, t->packet_data);
1608   return s;
1609 }
1610
1611 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1612 {
1613   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1614   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1615   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1616   vnet_main_t * vnm = vnet_get_main();
1617   ip4_main_t * im = &ip4_main;
1618   uword indent = format_get_indent (s);
1619
1620   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1621               t->fib_index, t->adj_index, format_ip_adjacency,
1622               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1623   s = format (s, "\n%U%U",
1624               format_white_space, indent,
1625               format_ip4_header, t->packet_data);
1626   return s;
1627 }
1628
1629 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1630 {
1631   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1632   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1633   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1634   vnet_main_t * vnm = vnet_get_main();
1635   ip4_main_t * im = &ip4_main;
1636   uword indent = format_get_indent (s);
1637
1638   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1639               t->fib_index, t->adj_index, format_ip_adjacency,
1640               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1641   s = format (s, "\n%U%U",
1642               format_white_space, indent,
1643               format_ip_adjacency_packet_data,
1644               vnm, &im->lookup_main, t->adj_index,
1645               t->packet_data, sizeof (t->packet_data));
1646   return s;
1647 }
1648
1649 /* Common trace function for all ip4-forward next nodes. */
1650 void
1651 ip4_forward_next_trace (vlib_main_t * vm,
1652                         vlib_node_runtime_t * node,
1653                         vlib_frame_t * frame,
1654                         vlib_rx_or_tx_t which_adj_index)
1655 {
1656   u32 * from, n_left;
1657   ip4_main_t * im = &ip4_main;
1658
1659   n_left = frame->n_vectors;
1660   from = vlib_frame_vector_args (frame);
1661   
1662   while (n_left >= 4)
1663     {
1664       u32 bi0, bi1;
1665       vlib_buffer_t * b0, * b1;
1666       ip4_forward_next_trace_t * t0, * t1;
1667
1668       /* Prefetch next iteration. */
1669       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1670       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1671
1672       bi0 = from[0];
1673       bi1 = from[1];
1674
1675       b0 = vlib_get_buffer (vm, bi0);
1676       b1 = vlib_get_buffer (vm, bi1);
1677
1678       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1679         {
1680           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1681           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1682           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1683           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1684               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1685               vec_elt (im->fib_index_by_sw_if_index,
1686                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1687
1688           clib_memcpy (t0->packet_data,
1689                   vlib_buffer_get_current (b0),
1690                   sizeof (t0->packet_data));
1691         }
1692       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1693         {
1694           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1695           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1696           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1697           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1698               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1699               vec_elt (im->fib_index_by_sw_if_index,
1700                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1701           clib_memcpy (t1->packet_data,
1702                   vlib_buffer_get_current (b1),
1703                   sizeof (t1->packet_data));
1704         }
1705       from += 2;
1706       n_left -= 2;
1707     }
1708
1709   while (n_left >= 1)
1710     {
1711       u32 bi0;
1712       vlib_buffer_t * b0;
1713       ip4_forward_next_trace_t * t0;
1714
1715       bi0 = from[0];
1716
1717       b0 = vlib_get_buffer (vm, bi0);
1718
1719       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1720         {
1721           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1722           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1723           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1724           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1725               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1726               vec_elt (im->fib_index_by_sw_if_index,
1727                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1728           clib_memcpy (t0->packet_data,
1729                   vlib_buffer_get_current (b0),
1730                   sizeof (t0->packet_data));
1731         }
1732       from += 1;
1733       n_left -= 1;
1734     }
1735 }
1736
1737 static uword
1738 ip4_drop_or_punt (vlib_main_t * vm,
1739                   vlib_node_runtime_t * node,
1740                   vlib_frame_t * frame,
1741                   ip4_error_t error_code)
1742 {
1743   u32 * buffers = vlib_frame_vector_args (frame);
1744   uword n_packets = frame->n_vectors;
1745
1746   vlib_error_drop_buffers (vm, node,
1747                            buffers,
1748                            /* stride */ 1,
1749                            n_packets,
1750                            /* next */ 0,
1751                            ip4_input_node.index,
1752                            error_code);
1753
1754   if (node->flags & VLIB_NODE_FLAG_TRACE)
1755     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1756
1757   return n_packets;
1758 }
1759
1760 static uword
1761 ip4_drop (vlib_main_t * vm,
1762           vlib_node_runtime_t * node,
1763           vlib_frame_t * frame)
1764 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1765
1766 static uword
1767 ip4_punt (vlib_main_t * vm,
1768           vlib_node_runtime_t * node,
1769           vlib_frame_t * frame)
1770 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1771
1772 static uword
1773 ip4_miss (vlib_main_t * vm,
1774           vlib_node_runtime_t * node,
1775           vlib_frame_t * frame)
1776 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1777
1778 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1779   .function = ip4_drop,
1780   .name = "ip4-drop",
1781   .vector_size = sizeof (u32),
1782
1783   .format_trace = format_ip4_forward_next_trace,
1784
1785   .n_next_nodes = 1,
1786   .next_nodes = {
1787     [0] = "error-drop",
1788   },
1789 };
1790
1791 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1792
1793 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1794   .function = ip4_punt,
1795   .name = "ip4-punt",
1796   .vector_size = sizeof (u32),
1797
1798   .format_trace = format_ip4_forward_next_trace,
1799
1800   .n_next_nodes = 1,
1801   .next_nodes = {
1802     [0] = "error-punt",
1803   },
1804 };
1805
1806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1807
1808 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1809   .function = ip4_miss,
1810   .name = "ip4-miss",
1811   .vector_size = sizeof (u32),
1812
1813   .format_trace = format_ip4_forward_next_trace,
1814
1815   .n_next_nodes = 1,
1816   .next_nodes = {
1817     [0] = "error-drop",
1818   },
1819 };
1820
1821 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1822
1823 /* Compute TCP/UDP/ICMP4 checksum in software. */
1824 u16
1825 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1826                               ip4_header_t * ip0)
1827 {
1828   ip_csum_t sum0;
1829   u32 ip_header_length, payload_length_host_byte_order;
1830   u32 n_this_buffer, n_bytes_left;
1831   u16 sum16;
1832   void * data_this_buffer;
1833   
1834   /* Initialize checksum with ip header. */
1835   ip_header_length = ip4_header_bytes (ip0);
1836   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1837   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1838
1839   if (BITS (uword) == 32)
1840     {
1841       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1842       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1843     }
1844   else
1845     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1846
1847   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1848   data_this_buffer = (void *) ip0 + ip_header_length;
1849   if (n_this_buffer + ip_header_length > p0->current_length)
1850     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1851   while (1)
1852     {
1853       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1854       n_bytes_left -= n_this_buffer;
1855       if (n_bytes_left == 0)
1856         break;
1857
1858       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1859       p0 = vlib_get_buffer (vm, p0->next_buffer);
1860       data_this_buffer = vlib_buffer_get_current (p0);
1861       n_this_buffer = p0->current_length;
1862     }
1863
1864   sum16 = ~ ip_csum_fold (sum0);
1865
1866   return sum16;
1867 }
1868
1869 static u32
1870 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1871 {
1872   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1873   udp_header_t * udp0;
1874   u16 sum16;
1875
1876   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1877           || ip0->protocol == IP_PROTOCOL_UDP);
1878
1879   udp0 = (void *) (ip0 + 1);
1880   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1881     {
1882       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1883                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1884       return p0->flags;
1885     }
1886
1887   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1888
1889   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1890                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1891
1892   return p0->flags;
1893 }
1894
1895 static uword
1896 ip4_local (vlib_main_t * vm,
1897            vlib_node_runtime_t * node,
1898            vlib_frame_t * frame)
1899 {
1900   ip4_main_t * im = &ip4_main;
1901   ip_lookup_main_t * lm = &im->lookup_main;
1902   ip_local_next_t next_index;
1903   u32 * from, * to_next, n_left_from, n_left_to_next;
1904   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1905
1906   from = vlib_frame_vector_args (frame);
1907   n_left_from = frame->n_vectors;
1908   next_index = node->cached_next_index;
1909   
1910   if (node->flags & VLIB_NODE_FLAG_TRACE)
1911     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1912
1913   while (n_left_from > 0)
1914     {
1915       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1916
1917       while (n_left_from >= 4 && n_left_to_next >= 2)
1918         {
1919           vlib_buffer_t * p0, * p1;
1920           ip4_header_t * ip0, * ip1;
1921           udp_header_t * udp0, * udp1;
1922           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1923           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1924           ip_adjacency_t * adj0, * adj1;
1925           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1926           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1927           i32 len_diff0, len_diff1;
1928           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1929           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1930           u8 enqueue_code;
1931       
1932           pi0 = to_next[0] = from[0];
1933           pi1 = to_next[1] = from[1];
1934           from += 2;
1935           n_left_from -= 2;
1936           to_next += 2;
1937           n_left_to_next -= 2;
1938       
1939           p0 = vlib_get_buffer (vm, pi0);
1940           p1 = vlib_get_buffer (vm, pi1);
1941
1942           ip0 = vlib_buffer_get_current (p0);
1943           ip1 = vlib_buffer_get_current (p1);
1944
1945           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1946                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1947           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1948                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1949
1950           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1951           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1952
1953           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1954
1955           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1956           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1957
1958           /* Treat IP frag packets as "experimental" protocol for now
1959              until support of IP frag reassembly is implemented */
1960           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1961           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1962           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1963           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1964           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1965           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1966
1967           flags0 = p0->flags;
1968           flags1 = p1->flags;
1969
1970           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1971           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1972
1973           udp0 = ip4_next_header (ip0);
1974           udp1 = ip4_next_header (ip1);
1975
1976           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1977           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1978           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1979
1980           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1981           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1982
1983           /* Verify UDP length. */
1984           ip_len0 = clib_net_to_host_u16 (ip0->length);
1985           ip_len1 = clib_net_to_host_u16 (ip1->length);
1986           udp_len0 = clib_net_to_host_u16 (udp0->length);
1987           udp_len1 = clib_net_to_host_u16 (udp1->length);
1988
1989           len_diff0 = ip_len0 - udp_len0;
1990           len_diff1 = ip_len1 - udp_len1;
1991
1992           len_diff0 = is_udp0 ? len_diff0 : 0;
1993           len_diff1 = is_udp1 ? len_diff1 : 0;
1994
1995           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1996                                 & good_tcp_udp0 & good_tcp_udp1)))
1997             {
1998               if (is_tcp_udp0)
1999                 {
2000                   if (is_tcp_udp0
2001                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2002                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2003                   good_tcp_udp0 =
2004                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2005                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2006                 }
2007               if (is_tcp_udp1)
2008                 {
2009                   if (is_tcp_udp1
2010                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2011                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2012                   good_tcp_udp1 =
2013                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2014                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2015                 }
2016             }
2017
2018           good_tcp_udp0 &= len_diff0 >= 0;
2019           good_tcp_udp1 &= len_diff1 >= 0;
2020
2021           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2022           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2023
2024           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2025
2026           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2027           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2028
2029           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2030           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2031                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2032                     : error0);
2033           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2034                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2035                     : error1);
2036
2037           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2038           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2039
2040           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2041           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2042
2043           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2044           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2045
2046           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2047                                                            &ip0->src_address,
2048                                                            /* no_default_route */ 1));
2049           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2050                                                            &ip1->src_address,
2051                                                            /* no_default_route */ 1));
2052
2053           adj0 = ip_get_adjacency (lm, adj_index0);
2054           adj1 = ip_get_adjacency (lm, adj_index1);
2055
2056           /* 
2057            * Must have a route to source otherwise we drop the packet.
2058            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2059            */
2060           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2061                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2062                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2063                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2064                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2065                     ? IP4_ERROR_SRC_LOOKUP_MISS
2066                     : error0);
2067           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2068                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2069                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2070                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2071                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2072                     ? IP4_ERROR_SRC_LOOKUP_MISS
2073                     : error1);
2074
2075           next0 = lm->local_next_by_ip_protocol[proto0];
2076           next1 = lm->local_next_by_ip_protocol[proto1];
2077
2078           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2079           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2080
2081           p0->error = error0 ? error_node->errors[error0] : 0;
2082           p1->error = error1 ? error_node->errors[error1] : 0;
2083
2084           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2085
2086           if (PREDICT_FALSE (enqueue_code != 0))
2087             {
2088               switch (enqueue_code)
2089                 {
2090                 case 1:
2091                   /* A B A */
2092                   to_next[-2] = pi1;
2093                   to_next -= 1;
2094                   n_left_to_next += 1;
2095                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2096                   break;
2097
2098                 case 2:
2099                   /* A A B */
2100                   to_next -= 1;
2101                   n_left_to_next += 1;
2102                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2103                   break;
2104
2105                 case 3:
2106                   /* A B B or A B C */
2107                   to_next -= 2;
2108                   n_left_to_next += 2;
2109                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2110                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2111                   if (next0 == next1)
2112                     {
2113                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2114                       next_index = next1;
2115                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2116                     }
2117                   break;
2118                 }
2119             }
2120         }
2121
2122       while (n_left_from > 0 && n_left_to_next > 0)
2123         {
2124           vlib_buffer_t * p0;
2125           ip4_header_t * ip0;
2126           udp_header_t * udp0;
2127           ip4_fib_mtrie_t * mtrie0;
2128           ip4_fib_mtrie_leaf_t leaf0;
2129           ip_adjacency_t * adj0;
2130           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2131           i32 len_diff0;
2132           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2133       
2134           pi0 = to_next[0] = from[0];
2135           from += 1;
2136           n_left_from -= 1;
2137           to_next += 1;
2138           n_left_to_next -= 1;
2139       
2140           p0 = vlib_get_buffer (vm, pi0);
2141
2142           ip0 = vlib_buffer_get_current (p0);
2143
2144           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2145                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2146
2147           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2148
2149           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2150
2151           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2152
2153           /* Treat IP frag packets as "experimental" protocol for now
2154              until support of IP frag reassembly is implemented */
2155           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2156           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2157           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2158
2159           flags0 = p0->flags;
2160
2161           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2162
2163           udp0 = ip4_next_header (ip0);
2164
2165           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2166           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2167
2168           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2169
2170           /* Verify UDP length. */
2171           ip_len0 = clib_net_to_host_u16 (ip0->length);
2172           udp_len0 = clib_net_to_host_u16 (udp0->length);
2173
2174           len_diff0 = ip_len0 - udp_len0;
2175
2176           len_diff0 = is_udp0 ? len_diff0 : 0;
2177
2178           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2179             {
2180               if (is_tcp_udp0)
2181                 {
2182                   if (is_tcp_udp0
2183                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2184                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2185                   good_tcp_udp0 =
2186                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2187                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2188                 }
2189             }
2190
2191           good_tcp_udp0 &= len_diff0 >= 0;
2192
2193           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2194
2195           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2196
2197           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2198
2199           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2200           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2201                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2202                     : error0);
2203
2204           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2205
2206           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2207           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2208
2209           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2210                                                            &ip0->src_address,
2211                                                            /* no_default_route */ 1));
2212
2213           adj0 = ip_get_adjacency (lm, adj_index0);
2214
2215           /* Must have a route to source otherwise we drop the packet. */
2216           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2217                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2218                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2219                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2220                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2221                     ? IP4_ERROR_SRC_LOOKUP_MISS
2222                     : error0);
2223
2224           next0 = lm->local_next_by_ip_protocol[proto0];
2225
2226           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2227
2228           p0->error = error0? error_node->errors[error0] : 0;
2229
2230           if (PREDICT_FALSE (next0 != next_index))
2231             {
2232               n_left_to_next += 1;
2233               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2234
2235               next_index = next0;
2236               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2237               to_next[0] = pi0;
2238               to_next += 1;
2239               n_left_to_next -= 1;
2240             }
2241         }
2242   
2243       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2244     }
2245
2246   return frame->n_vectors;
2247 }
2248
2249 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2250   .function = ip4_local,
2251   .name = "ip4-local",
2252   .vector_size = sizeof (u32),
2253
2254   .format_trace = format_ip4_forward_next_trace,
2255
2256   .n_next_nodes = IP_LOCAL_N_NEXT,
2257   .next_nodes = {
2258     [IP_LOCAL_NEXT_DROP] = "error-drop",
2259     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2260     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2261     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2262   },
2263 };
2264
2265 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2266
2267 void ip4_register_protocol (u32 protocol, u32 node_index)
2268 {
2269   vlib_main_t * vm = vlib_get_main();
2270   ip4_main_t * im = &ip4_main;
2271   ip_lookup_main_t * lm = &im->lookup_main;
2272
2273   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2274   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2275 }
2276
2277 static clib_error_t *
2278 show_ip_local_command_fn (vlib_main_t * vm,
2279                           unformat_input_t * input,
2280                          vlib_cli_command_t * cmd)
2281 {
2282   ip4_main_t * im = &ip4_main;
2283   ip_lookup_main_t * lm = &im->lookup_main;
2284   int i;
2285
2286   vlib_cli_output (vm, "Protocols handled by ip4_local");
2287   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2288     {
2289       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2290         vlib_cli_output (vm, "%d", i);
2291     }
2292   return 0;
2293 }
2294
2295
2296
2297 VLIB_CLI_COMMAND (show_ip_local, static) = {
2298   .path = "show ip local",
2299   .function = show_ip_local_command_fn,
2300   .short_help = "Show ip local protocol table",
2301 };
2302
2303 static uword
2304 ip4_arp (vlib_main_t * vm,
2305          vlib_node_runtime_t * node,
2306          vlib_frame_t * frame)
2307 {
2308   vnet_main_t * vnm = vnet_get_main();
2309   ip4_main_t * im = &ip4_main;
2310   ip_lookup_main_t * lm = &im->lookup_main;
2311   u32 * from, * to_next_drop;
2312   uword n_left_from, n_left_to_next_drop, next_index;
2313   static f64 time_last_seed_change = -1e100;
2314   static u32 hash_seeds[3];
2315   static uword hash_bitmap[256 / BITS (uword)]; 
2316   f64 time_now;
2317
2318   if (node->flags & VLIB_NODE_FLAG_TRACE)
2319     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2320
2321   time_now = vlib_time_now (vm);
2322   if (time_now - time_last_seed_change > 1e-3)
2323     {
2324       uword i;
2325       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2326                                              sizeof (hash_seeds));
2327       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2328         hash_seeds[i] = r[i];
2329
2330       /* Mark all hash keys as been no-seen before. */
2331       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2332         hash_bitmap[i] = 0;
2333
2334       time_last_seed_change = time_now;
2335     }
2336
2337   from = vlib_frame_vector_args (frame);
2338   n_left_from = frame->n_vectors;
2339   next_index = node->cached_next_index;
2340   if (next_index == IP4_ARP_NEXT_DROP)
2341     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2342
2343   while (n_left_from > 0)
2344     {
2345       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2346                            to_next_drop, n_left_to_next_drop);
2347
2348       while (n_left_from > 0 && n_left_to_next_drop > 0)
2349         {
2350           vlib_buffer_t * p0;
2351           ip4_header_t * ip0;
2352           ethernet_header_t * eh0;
2353           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2354           uword bm0;
2355           ip_adjacency_t * adj0;
2356
2357           pi0 = from[0];
2358
2359           p0 = vlib_get_buffer (vm, pi0);
2360
2361           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2362           adj0 = ip_get_adjacency (lm, adj_index0);
2363           ip0 = vlib_buffer_get_current (p0);
2364
2365           /* If packet destination is not local, send ARP to next hop */
2366           if (adj0->arp.next_hop.ip4.as_u32)
2367             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2368
2369           /* 
2370            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2371            * rewrite to this packet, we need to skip it here.
2372            * Note, to distinguish from src IP addr *.8.6.*, we
2373            * check for a bcast eth dest instead of IPv4 version.
2374            */
2375           eh0 = (ethernet_header_t*)ip0;
2376           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2377             {
2378               u32 vlan_num = 0;
2379               u16 * etype = &eh0->type;
2380               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2381                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2382                 {
2383                   vlan_num += 1;
2384                   etype += 2; //vlan tag also 16 bits, same as etype
2385                 }
2386               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2387                 {
2388                   vlib_buffer_advance (
2389                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2390                   ip0 = vlib_buffer_get_current (p0);
2391                 }
2392             }
2393
2394           a0 = hash_seeds[0];
2395           b0 = hash_seeds[1];
2396           c0 = hash_seeds[2];
2397
2398           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2399           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2400
2401           a0 ^= ip0->dst_address.data_u32;
2402           b0 ^= sw_if_index0;
2403
2404           hash_v3_finalize32 (a0, b0, c0);
2405
2406           c0 &= BITS (hash_bitmap) - 1;
2407           c0 = c0 / BITS (uword);
2408           m0 = (uword) 1 << (c0 % BITS (uword));
2409
2410           bm0 = hash_bitmap[c0];
2411           drop0 = (bm0 & m0) != 0;
2412
2413           /* Mark it as seen. */
2414           hash_bitmap[c0] = bm0 | m0;
2415
2416           from += 1;
2417           n_left_from -= 1;
2418           to_next_drop[0] = pi0;
2419           to_next_drop += 1;
2420           n_left_to_next_drop -= 1;
2421
2422           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2423
2424           if (drop0)
2425             continue;
2426
2427           /* 
2428            * Can happen if the control-plane is programming tables
2429            * with traffic flowing; at least that's today's lame excuse.
2430            */
2431           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2432             {
2433               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2434             }
2435           else
2436           /* Send ARP request. */
2437           {
2438             u32 bi0 = 0;
2439             vlib_buffer_t * b0;
2440             ethernet_arp_header_t * h0;
2441             vnet_hw_interface_t * hw_if0;
2442
2443             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2444
2445             /* Add rewrite/encap string for ARP packet. */
2446             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2447
2448             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2449
2450             /* Src ethernet address in ARP header. */
2451             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2452                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2453
2454             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2455                 //No source address available
2456                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2457                 vlib_buffer_free(vm, &bi0, 1);
2458                 continue;
2459             }
2460
2461             /* Copy in destination address we are requesting. */
2462             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2463
2464             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2465             b0 = vlib_get_buffer (vm, bi0);
2466             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2467
2468             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2469
2470             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2471           }
2472         }
2473
2474       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2475     }
2476
2477   return frame->n_vectors;
2478 }
2479
2480 static char * ip4_arp_error_strings[] = {
2481   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2482   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2483   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2484   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2485   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2486   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2487 };
2488
2489 VLIB_REGISTER_NODE (ip4_arp_node) = {
2490   .function = ip4_arp,
2491   .name = "ip4-arp",
2492   .vector_size = sizeof (u32),
2493
2494   .format_trace = format_ip4_forward_next_trace,
2495
2496   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2497   .error_strings = ip4_arp_error_strings,
2498
2499   .n_next_nodes = IP4_ARP_N_NEXT,
2500   .next_nodes = {
2501     [IP4_ARP_NEXT_DROP] = "error-drop",
2502   },
2503 };
2504
2505 #define foreach_notrace_ip4_arp_error           \
2506 _(DROP)                                         \
2507 _(REQUEST_SENT)                                 \
2508 _(REPLICATE_DROP)                               \
2509 _(REPLICATE_FAIL)
2510
2511 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2512 {
2513   vlib_node_runtime_t *rt = 
2514     vlib_node_get_runtime (vm, ip4_arp_node.index);
2515
2516   /* don't trace ARP request packets */
2517 #define _(a)                                    \
2518     vnet_pcap_drop_trace_filter_add_del         \
2519         (rt->errors[IP4_ARP_ERROR_##a],         \
2520          1 /* is_add */);
2521     foreach_notrace_ip4_arp_error;
2522 #undef _
2523   return 0;
2524 }
2525
2526 VLIB_INIT_FUNCTION(arp_notrace_init);
2527
2528
2529 /* Send an ARP request to see if given destination is reachable on given interface. */
2530 clib_error_t *
2531 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2532 {
2533   vnet_main_t * vnm = vnet_get_main();
2534   ip4_main_t * im = &ip4_main;
2535   ethernet_arp_header_t * h;
2536   ip4_address_t * src;
2537   ip_interface_address_t * ia;
2538   ip_adjacency_t * adj;
2539   vnet_hw_interface_t * hi;
2540   vnet_sw_interface_t * si;
2541   vlib_buffer_t * b;
2542   u32 bi = 0;
2543
2544   si = vnet_get_sw_interface (vnm, sw_if_index);
2545
2546   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2547     {
2548       return clib_error_return (0, "%U: interface %U down",
2549                                 format_ip4_address, dst, 
2550                                 format_vnet_sw_if_index_name, vnm, 
2551                                 sw_if_index);
2552     }
2553
2554   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2555   if (! src)
2556     {
2557       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2558       return clib_error_return 
2559         (0, "no matching interface address for destination %U (interface %U)",
2560          format_ip4_address, dst,
2561          format_vnet_sw_if_index_name, vnm, sw_if_index);
2562     }
2563
2564   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2565
2566   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2567
2568   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2569
2570   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2571
2572   h->ip4_over_ethernet[0].ip4 = src[0];
2573   h->ip4_over_ethernet[1].ip4 = dst[0];
2574
2575   b = vlib_get_buffer (vm, bi);
2576   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2577
2578   /* Add encapsulation string for software interface (e.g. ethernet header). */
2579   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2580   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2581
2582   {
2583     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2584     u32 * to_next = vlib_frame_vector_args (f);
2585     to_next[0] = bi;
2586     f->n_vectors = 1;
2587     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2588   }
2589
2590   return /* no error */ 0;
2591 }
2592
2593 typedef enum {
2594   IP4_REWRITE_NEXT_DROP,
2595   IP4_REWRITE_NEXT_ARP,
2596   IP4_REWRITE_NEXT_ICMP_ERROR,
2597 } ip4_rewrite_next_t;
2598
2599 always_inline uword
2600 ip4_rewrite_inline (vlib_main_t * vm,
2601                     vlib_node_runtime_t * node,
2602                     vlib_frame_t * frame,
2603                     int rewrite_for_locally_received_packets)
2604 {
2605   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2606   u32 * from = vlib_frame_vector_args (frame);
2607   u32 n_left_from, n_left_to_next, * to_next, next_index;
2608   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2609   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2610
2611   n_left_from = frame->n_vectors;
2612   next_index = node->cached_next_index;
2613   u32 cpu_index = os_get_cpu_number();
2614   
2615   while (n_left_from > 0)
2616     {
2617       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2618
2619       while (n_left_from >= 4 && n_left_to_next >= 2)
2620         {
2621           ip_adjacency_t * adj0, * adj1;
2622           vlib_buffer_t * p0, * p1;
2623           ip4_header_t * ip0, * ip1;
2624           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2625           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2626           u32 next0_override, next1_override;
2627       
2628           if (rewrite_for_locally_received_packets)
2629               next0_override = next1_override = 0;
2630
2631           /* Prefetch next iteration. */
2632           {
2633             vlib_buffer_t * p2, * p3;
2634
2635             p2 = vlib_get_buffer (vm, from[2]);
2636             p3 = vlib_get_buffer (vm, from[3]);
2637
2638             vlib_prefetch_buffer_header (p2, STORE);
2639             vlib_prefetch_buffer_header (p3, STORE);
2640
2641             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2642             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2643           }
2644
2645           pi0 = to_next[0] = from[0];
2646           pi1 = to_next[1] = from[1];
2647
2648           from += 2;
2649           n_left_from -= 2;
2650           to_next += 2;
2651           n_left_to_next -= 2;
2652       
2653           p0 = vlib_get_buffer (vm, pi0);
2654           p1 = vlib_get_buffer (vm, pi1);
2655
2656           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2657           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2658
2659           /* We should never rewrite a pkt using the MISS adjacency */
2660           ASSERT(adj_index0 && adj_index1);
2661
2662           ip0 = vlib_buffer_get_current (p0);
2663           ip1 = vlib_buffer_get_current (p1);
2664
2665           error0 = error1 = IP4_ERROR_NONE;
2666           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2667
2668           /* Decrement TTL & update checksum.
2669              Works either endian, so no need for byte swap. */
2670           if (! rewrite_for_locally_received_packets)
2671             {
2672               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2673
2674               /* Input node should have reject packets with ttl 0. */
2675               ASSERT (ip0->ttl > 0);
2676               ASSERT (ip1->ttl > 0);
2677
2678               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2679               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2680
2681               checksum0 += checksum0 >= 0xffff;
2682               checksum1 += checksum1 >= 0xffff;
2683
2684               ip0->checksum = checksum0;
2685               ip1->checksum = checksum1;
2686
2687               ttl0 -= 1;
2688               ttl1 -= 1;
2689
2690               ip0->ttl = ttl0;
2691               ip1->ttl = ttl1;
2692
2693               /*
2694                * If the ttl drops below 1 when forwarding, generate
2695                * an ICMP response.
2696                */
2697               if (PREDICT_FALSE(ttl0 <= 0))
2698                 {
2699                   error0 = IP4_ERROR_TIME_EXPIRED;
2700                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2701                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2702                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2703                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2704                 }
2705               if (PREDICT_FALSE(ttl1 <= 0))
2706                 {
2707                   error1 = IP4_ERROR_TIME_EXPIRED;
2708                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2709                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2710                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2711                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2712                 }
2713
2714               /* Verify checksum. */
2715               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2716               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2717             }
2718
2719           /* Rewrite packet header and updates lengths. */
2720           adj0 = ip_get_adjacency (lm, adj_index0);
2721           adj1 = ip_get_adjacency (lm, adj_index1);
2722       
2723           if (rewrite_for_locally_received_packets)
2724             {
2725               /*
2726                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2727                * we end up here with a local adjacency in hand
2728                * The local adj rewrite data is 0xfefe on purpose.
2729                * Bad engineer, no donut for you.
2730                */
2731               if (PREDICT_FALSE(adj0->lookup_next_index 
2732                                 == IP_LOOKUP_NEXT_LOCAL))
2733                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2734               if (PREDICT_FALSE(adj0->lookup_next_index
2735                                 == IP_LOOKUP_NEXT_ARP))
2736                 next0_override = IP4_REWRITE_NEXT_ARP;
2737               if (PREDICT_FALSE(adj1->lookup_next_index 
2738                                 == IP_LOOKUP_NEXT_LOCAL))
2739                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2740               if (PREDICT_FALSE(adj1->lookup_next_index
2741                                 == IP_LOOKUP_NEXT_ARP))
2742                 next1_override = IP4_REWRITE_NEXT_ARP;
2743             }
2744
2745           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2746           rw_len0 = adj0[0].rewrite_header.data_bytes;
2747           rw_len1 = adj1[0].rewrite_header.data_bytes;
2748
2749           /* Check MTU of outgoing interface. */
2750           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2751                     ? IP4_ERROR_MTU_EXCEEDED
2752                     : error0);
2753           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2754                     ? IP4_ERROR_MTU_EXCEEDED
2755                     : error1);
2756
2757           next0 = (error0 == IP4_ERROR_NONE)
2758             ? adj0[0].rewrite_header.next_index : next0;
2759
2760           if (rewrite_for_locally_received_packets)
2761               next0 = next0 && next0_override ? next0_override : next0;
2762
2763           next1 = (error1 == IP4_ERROR_NONE)
2764             ? adj1[0].rewrite_header.next_index : next1;
2765
2766           if (rewrite_for_locally_received_packets)
2767               next1 = next1 && next1_override ? next1_override : next1;
2768
2769           /* 
2770            * We've already accounted for an ethernet_header_t elsewhere
2771            */
2772           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2773               vlib_increment_combined_counter 
2774                   (&lm->adjacency_counters,
2775                    cpu_index, adj_index0, 
2776                    /* packet increment */ 0,
2777                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2778
2779           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2780               vlib_increment_combined_counter 
2781                   (&lm->adjacency_counters,
2782                    cpu_index, adj_index1, 
2783                    /* packet increment */ 0,
2784                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2785
2786           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2787            * to see the IP headerr */
2788           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2789             {
2790               p0->current_data -= rw_len0;
2791               p0->current_length += rw_len0;
2792               p0->error = error_node->errors[error0];
2793               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2794                   adj0[0].rewrite_header.sw_if_index;
2795             }
2796           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2797             {
2798               p1->current_data -= rw_len1;
2799               p1->current_length += rw_len1;
2800               p1->error = error_node->errors[error1];
2801               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2802                   adj1[0].rewrite_header.sw_if_index;
2803             }
2804
2805           /* Guess we are only writing on simple Ethernet header. */
2806           vnet_rewrite_two_headers (adj0[0], adj1[0],
2807                                     ip0, ip1,
2808                                     sizeof (ethernet_header_t));
2809       
2810           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2811                                            to_next, n_left_to_next,
2812                                            pi0, pi1, next0, next1);
2813         }
2814
2815       while (n_left_from > 0 && n_left_to_next > 0)
2816         {
2817           ip_adjacency_t * adj0;
2818           vlib_buffer_t * p0;
2819           ip4_header_t * ip0;
2820           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2821           u32 next0_override;
2822       
2823           if (rewrite_for_locally_received_packets)
2824               next0_override = 0;
2825
2826           pi0 = to_next[0] = from[0];
2827
2828           p0 = vlib_get_buffer (vm, pi0);
2829
2830           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2831
2832           /* We should never rewrite a pkt using the MISS adjacency */
2833           ASSERT(adj_index0);
2834
2835           adj0 = ip_get_adjacency (lm, adj_index0);
2836       
2837           ip0 = vlib_buffer_get_current (p0);
2838
2839           error0 = IP4_ERROR_NONE;
2840           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2841
2842           /* Decrement TTL & update checksum. */
2843           if (! rewrite_for_locally_received_packets)
2844             {
2845               i32 ttl0 = ip0->ttl;
2846
2847               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2848
2849               checksum0 += checksum0 >= 0xffff;
2850
2851               ip0->checksum = checksum0;
2852
2853               ASSERT (ip0->ttl > 0);
2854
2855               ttl0 -= 1;
2856
2857               ip0->ttl = ttl0;
2858
2859               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2860
2861               if (PREDICT_FALSE(ttl0 <= 0))
2862                 {
2863                   /*
2864                    * If the ttl drops below 1 when forwarding, generate
2865                    * an ICMP response.
2866                    */
2867                   error0 = IP4_ERROR_TIME_EXPIRED;
2868                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2869                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2870                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2871                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2872                 }
2873             }
2874
2875           if (rewrite_for_locally_received_packets)
2876             {
2877               /*
2878                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2879                * we end up here with a local adjacency in hand
2880                * The local adj rewrite data is 0xfefe on purpose.
2881                * Bad engineer, no donut for you.
2882                */
2883               if (PREDICT_FALSE(adj0->lookup_next_index 
2884                                 == IP_LOOKUP_NEXT_LOCAL))
2885                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2886               /* 
2887                * We have to override the next_index in ARP adjacencies,
2888                * because they're set up for ip4-arp, not this node...
2889                */
2890               if (PREDICT_FALSE(adj0->lookup_next_index
2891                                 == IP_LOOKUP_NEXT_ARP))
2892                 next0_override = IP4_REWRITE_NEXT_ARP;
2893             }
2894
2895           /* Guess we are only writing on simple Ethernet header. */
2896           vnet_rewrite_one_header (adj0[0], ip0, 
2897                                    sizeof (ethernet_header_t));
2898           
2899           /* Update packet buffer attributes/set output interface. */
2900           rw_len0 = adj0[0].rewrite_header.data_bytes;
2901           
2902           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2903               vlib_increment_combined_counter 
2904                   (&lm->adjacency_counters,
2905                    cpu_index, adj_index0, 
2906                    /* packet increment */ 0,
2907                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2908           
2909           /* Check MTU of outgoing interface. */
2910           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2911                     > adj0[0].rewrite_header.max_l3_packet_bytes
2912                     ? IP4_ERROR_MTU_EXCEEDED
2913                     : error0);
2914
2915           p0->error = error_node->errors[error0];
2916
2917           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2918            * to see the IP headerr */
2919           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2920             {
2921               p0->current_data -= rw_len0;
2922               p0->current_length += rw_len0;
2923
2924               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2925                   adj0[0].rewrite_header.sw_if_index;
2926               next0 = adj0[0].rewrite_header.next_index;
2927             }
2928
2929           if (rewrite_for_locally_received_packets)
2930               next0 = next0 && next0_override ? next0_override : next0;
2931
2932           from += 1;
2933           n_left_from -= 1;
2934           to_next += 1;
2935           n_left_to_next -= 1;
2936       
2937           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2938                                            to_next, n_left_to_next,
2939                                            pi0, next0);
2940         }
2941   
2942       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2943     }
2944
2945   /* Need to do trace after rewrites to pick up new packet data. */
2946   if (node->flags & VLIB_NODE_FLAG_TRACE)
2947     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2948
2949   return frame->n_vectors;
2950 }
2951
2952
2953 /** \brief IPv4 transit rewrite node.
2954     @node ip4-rewrite-transit
2955
2956     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2957     header checksum, fetch the ip adjacency, check the outbound mtu,
2958     apply the adjacency rewrite, and send pkts to the adjacency
2959     rewrite header's rewrite_next_index.
2960
2961     @param vm vlib_main_t corresponding to the current thread
2962     @param node vlib_node_runtime_t
2963     @param frame vlib_frame_t whose contents should be dispatched
2964
2965     @par Graph mechanics: buffer metadata, next index usage
2966
2967     @em Uses:
2968     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2969         - the rewrite adjacency index
2970     - <code>adj->lookup_next_index</code>
2971         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2972           the packet will be dropped. 
2973     - <code>adj->rewrite_header</code>
2974         - Rewrite string length, rewrite string, next_index
2975
2976     @em Sets:
2977     - <code>b->current_data, b->current_length</code>
2978         - Updated net of applying the rewrite string
2979
2980     <em>Next Indices:</em>
2981     - <code> adj->rewrite_header.next_index </code>
2982       or @c error-drop 
2983 */
2984 static uword
2985 ip4_rewrite_transit (vlib_main_t * vm,
2986                      vlib_node_runtime_t * node,
2987                      vlib_frame_t * frame)
2988 {
2989   return ip4_rewrite_inline (vm, node, frame,
2990                              /* rewrite_for_locally_received_packets */ 0);
2991 }
2992
2993 /** \brief IPv4 local rewrite node.
2994     @node ip4-rewrite-local
2995
2996     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2997     the outbound interface mtu, apply the adjacency rewrite, and send
2998     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2999     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3000     dst = interface addr."
3001
3002     @param vm vlib_main_t corresponding to the current thread
3003     @param node vlib_node_runtime_t
3004     @param frame vlib_frame_t whose contents should be dispatched
3005
3006     @par Graph mechanics: buffer metadata, next index usage
3007
3008     @em Uses:
3009     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3010         - the rewrite adjacency index
3011     - <code>adj->lookup_next_index</code>
3012         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3013           the packet will be dropped. 
3014     - <code>adj->rewrite_header</code>
3015         - Rewrite string length, rewrite string, next_index
3016
3017     @em Sets:
3018     - <code>b->current_data, b->current_length</code>
3019         - Updated net of applying the rewrite string
3020
3021     <em>Next Indices:</em>
3022     - <code> adj->rewrite_header.next_index </code>
3023       or @c error-drop 
3024 */
3025
3026 static uword
3027 ip4_rewrite_local (vlib_main_t * vm,
3028                    vlib_node_runtime_t * node,
3029                    vlib_frame_t * frame)
3030 {
3031   return ip4_rewrite_inline (vm, node, frame,
3032                              /* rewrite_for_locally_received_packets */ 1);
3033 }
3034
3035 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3036   .function = ip4_rewrite_transit,
3037   .name = "ip4-rewrite-transit",
3038   .vector_size = sizeof (u32),
3039
3040   .format_trace = format_ip4_rewrite_trace,
3041
3042   .n_next_nodes = 3,
3043   .next_nodes = {
3044     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3045     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3046     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3047   },
3048 };
3049
3050 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3051
3052 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3053   .function = ip4_rewrite_local,
3054   .name = "ip4-rewrite-local",
3055   .vector_size = sizeof (u32),
3056
3057   .sibling_of = "ip4-rewrite-transit",
3058
3059   .format_trace = format_ip4_rewrite_trace,
3060
3061   .n_next_nodes = 0,
3062 };
3063
3064 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3065
3066 static clib_error_t *
3067 add_del_interface_table (vlib_main_t * vm,
3068                          unformat_input_t * input,
3069                          vlib_cli_command_t * cmd)
3070 {
3071   vnet_main_t * vnm = vnet_get_main();
3072   clib_error_t * error = 0;
3073   u32 sw_if_index, table_id;
3074
3075   sw_if_index = ~0;
3076
3077   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3078     {
3079       error = clib_error_return (0, "unknown interface `%U'",
3080                                  format_unformat_error, input);
3081       goto done;
3082     }
3083
3084   if (unformat (input, "%d", &table_id))
3085     ;
3086   else
3087     {
3088       error = clib_error_return (0, "expected table id `%U'",
3089                                  format_unformat_error, input);
3090       goto done;
3091     }
3092
3093   {
3094     ip4_main_t * im = &ip4_main;
3095     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3096
3097     if (fib) 
3098       {
3099         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3100         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3101     }
3102   }
3103
3104  done:
3105   return error;
3106 }
3107
3108 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3109   .path = "set interface ip table",
3110   .function = add_del_interface_table,
3111   .short_help = "Add/delete FIB table id for interface",
3112 };
3113
3114
3115 static uword
3116 ip4_lookup_multicast (vlib_main_t * vm,
3117                       vlib_node_runtime_t * node,
3118                       vlib_frame_t * frame)
3119 {
3120   ip4_main_t * im = &ip4_main;
3121   ip_lookup_main_t * lm = &im->lookup_main;
3122   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3123   u32 n_left_from, n_left_to_next, * from, * to_next;
3124   ip_lookup_next_t next;
3125   u32 cpu_index = os_get_cpu_number();
3126
3127   from = vlib_frame_vector_args (frame);
3128   n_left_from = frame->n_vectors;
3129   next = node->cached_next_index;
3130
3131   while (n_left_from > 0)
3132     {
3133       vlib_get_next_frame (vm, node, next,
3134                            to_next, n_left_to_next);
3135
3136       while (n_left_from >= 4 && n_left_to_next >= 2)
3137         {
3138           vlib_buffer_t * p0, * p1;
3139           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3140           ip_lookup_next_t next0, next1;
3141           ip4_header_t * ip0, * ip1;
3142           ip_adjacency_t * adj0, * adj1;
3143           u32 fib_index0, fib_index1;
3144           u32 flow_hash_config0, flow_hash_config1;
3145
3146           /* Prefetch next iteration. */
3147           {
3148             vlib_buffer_t * p2, * p3;
3149
3150             p2 = vlib_get_buffer (vm, from[2]);
3151             p3 = vlib_get_buffer (vm, from[3]);
3152
3153             vlib_prefetch_buffer_header (p2, LOAD);
3154             vlib_prefetch_buffer_header (p3, LOAD);
3155
3156             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3157             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3158           }
3159
3160           pi0 = to_next[0] = from[0];
3161           pi1 = to_next[1] = from[1];
3162
3163           p0 = vlib_get_buffer (vm, pi0);
3164           p1 = vlib_get_buffer (vm, pi1);
3165
3166           ip0 = vlib_buffer_get_current (p0);
3167           ip1 = vlib_buffer_get_current (p1);
3168
3169           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3170           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3171           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3172             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3173           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3174             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3175
3176           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3177                                               &ip0->dst_address, p0);
3178           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3179                                               &ip1->dst_address, p1);
3180
3181           adj0 = ip_get_adjacency (lm, adj_index0);
3182           adj1 = ip_get_adjacency (lm, adj_index1);
3183
3184           next0 = adj0->lookup_next_index;
3185           next1 = adj1->lookup_next_index;
3186
3187           flow_hash_config0 = 
3188               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3189
3190           flow_hash_config1 = 
3191               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3192
3193           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3194               (ip0, flow_hash_config0);
3195                                                                   
3196           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3197               (ip1, flow_hash_config1);
3198
3199           ASSERT (adj0->n_adj > 0);
3200           ASSERT (adj1->n_adj > 0);
3201           ASSERT (is_pow2 (adj0->n_adj));
3202           ASSERT (is_pow2 (adj1->n_adj));
3203           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3204           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3205
3206           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3207           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3208
3209           if (1) /* $$$$$$ HACK FIXME */
3210           vlib_increment_combined_counter 
3211               (cm, cpu_index, adj_index0, 1,
3212                vlib_buffer_length_in_chain (vm, p0));
3213           if (1) /* $$$$$$ HACK FIXME */
3214           vlib_increment_combined_counter 
3215               (cm, cpu_index, adj_index1, 1,
3216                vlib_buffer_length_in_chain (vm, p1));
3217
3218           from += 2;
3219           to_next += 2;
3220           n_left_to_next -= 2;
3221           n_left_from -= 2;
3222
3223           wrong_next = (next0 != next) + 2*(next1 != next);
3224           if (PREDICT_FALSE (wrong_next != 0))
3225             {
3226               switch (wrong_next)
3227                 {
3228                 case 1:
3229                   /* A B A */
3230                   to_next[-2] = pi1;
3231                   to_next -= 1;
3232                   n_left_to_next += 1;
3233                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3234                   break;
3235
3236                 case 2:
3237                   /* A A B */
3238                   to_next -= 1;
3239                   n_left_to_next += 1;
3240                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3241                   break;
3242
3243                 case 3:
3244                   /* A B C */
3245                   to_next -= 2;
3246                   n_left_to_next += 2;
3247                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3248                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3249                   if (next0 == next1)
3250                     {
3251                       /* A B B */
3252                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3253                       next = next1;
3254                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3255                     }
3256                 }
3257             }
3258         }
3259     
3260       while (n_left_from > 0 && n_left_to_next > 0)
3261         {
3262           vlib_buffer_t * p0;
3263           ip4_header_t * ip0;
3264           u32 pi0, adj_index0;
3265           ip_lookup_next_t next0;
3266           ip_adjacency_t * adj0;
3267           u32 fib_index0;
3268           u32 flow_hash_config0;
3269
3270           pi0 = from[0];
3271           to_next[0] = pi0;
3272
3273           p0 = vlib_get_buffer (vm, pi0);
3274
3275           ip0 = vlib_buffer_get_current (p0);
3276
3277           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3278                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3279           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3280               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3281           
3282           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3283                                               &ip0->dst_address, p0);
3284
3285           adj0 = ip_get_adjacency (lm, adj_index0);
3286
3287           next0 = adj0->lookup_next_index;
3288
3289           flow_hash_config0 = 
3290               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3291
3292           vnet_buffer (p0)->ip.flow_hash = 
3293             ip4_compute_flow_hash (ip0, flow_hash_config0);
3294
3295           ASSERT (adj0->n_adj > 0);
3296           ASSERT (is_pow2 (adj0->n_adj));
3297           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3298
3299           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3300
3301           if (1) /* $$$$$$ HACK FIXME */
3302               vlib_increment_combined_counter 
3303                   (cm, cpu_index, adj_index0, 1,
3304                    vlib_buffer_length_in_chain (vm, p0));
3305
3306           from += 1;
3307           to_next += 1;
3308           n_left_to_next -= 1;
3309           n_left_from -= 1;
3310
3311           if (PREDICT_FALSE (next0 != next))
3312             {
3313               n_left_to_next += 1;
3314               vlib_put_next_frame (vm, node, next, n_left_to_next);
3315               next = next0;
3316               vlib_get_next_frame (vm, node, next,
3317                                    to_next, n_left_to_next);
3318               to_next[0] = pi0;
3319               to_next += 1;
3320               n_left_to_next -= 1;
3321             }
3322         }
3323
3324       vlib_put_next_frame (vm, node, next, n_left_to_next);
3325     }
3326
3327   if (node->flags & VLIB_NODE_FLAG_TRACE)
3328       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3329
3330   return frame->n_vectors;
3331 }
3332
3333 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3334   .function = ip4_lookup_multicast,
3335   .name = "ip4-lookup-multicast",
3336   .vector_size = sizeof (u32),
3337   .sibling_of = "ip4-lookup",
3338   .format_trace = format_ip4_lookup_trace,
3339
3340   .n_next_nodes = 0,
3341 };
3342
3343 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3344
3345 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3346   .function = ip4_drop,
3347   .name = "ip4-multicast",
3348   .vector_size = sizeof (u32),
3349
3350   .format_trace = format_ip4_forward_next_trace,
3351
3352   .n_next_nodes = 1,
3353   .next_nodes = {
3354     [0] = "error-drop",
3355   },
3356 };
3357
3358 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3359 {
3360   ip4_main_t * im = &ip4_main;
3361   ip4_fib_mtrie_t * mtrie0;
3362   ip4_fib_mtrie_leaf_t leaf0;
3363   u32 adj_index0;
3364     
3365   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3366
3367   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3368   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3369   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3370   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3371   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3372   
3373   /* Handle default route. */
3374   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3375   
3376   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3377   
3378   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3379                                                   a, 
3380                                                   /* no_default_route */ 0);
3381 }
3382  
3383 static clib_error_t *
3384 test_lookup_command_fn (vlib_main_t * vm,
3385                         unformat_input_t * input,
3386                         vlib_cli_command_t * cmd)
3387 {
3388   u32 table_id = 0;
3389   f64 count = 1;
3390   u32 n;
3391   int i;
3392   ip4_address_t ip4_base_address;
3393   u64 errors = 0;
3394
3395   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3396       if (unformat (input, "table %d", &table_id))
3397         ;
3398       else if (unformat (input, "count %f", &count))
3399         ;
3400
3401       else if (unformat (input, "%U",
3402                          unformat_ip4_address, &ip4_base_address))
3403         ;
3404       else
3405         return clib_error_return (0, "unknown input `%U'",
3406                                   format_unformat_error, input);
3407   }
3408
3409   n = count;
3410
3411   for (i = 0; i < n; i++)
3412     {
3413       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3414         errors++;
3415
3416       ip4_base_address.as_u32 = 
3417         clib_host_to_net_u32 (1 + 
3418                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3419     }
3420
3421   if (errors) 
3422     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3423   else
3424     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3425
3426   return 0;
3427 }
3428
3429 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3430     .path = "test lookup",
3431     .short_help = "test lookup",
3432     .function = test_lookup_command_fn,
3433 };
3434
3435 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3436 {
3437   ip4_main_t * im4 = &ip4_main;
3438   ip4_fib_t * fib;
3439   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3440
3441   if (p == 0)
3442     return VNET_API_ERROR_NO_SUCH_FIB;
3443
3444   fib = vec_elt_at_index (im4->fibs, p[0]);
3445
3446   fib->flow_hash_config = flow_hash_config;
3447   return 0;
3448 }
3449  
3450 static clib_error_t *
3451 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3452                              unformat_input_t * input,
3453                              vlib_cli_command_t * cmd)
3454 {
3455   int matched = 0;
3456   u32 table_id = 0;
3457   u32 flow_hash_config = 0;
3458   int rv;
3459
3460   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3461     if (unformat (input, "table %d", &table_id))
3462       matched = 1;
3463 #define _(a,v) \
3464     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3465     foreach_flow_hash_bit
3466 #undef _
3467     else break;
3468   }
3469   
3470   if (matched == 0)
3471     return clib_error_return (0, "unknown input `%U'",
3472                               format_unformat_error, input);
3473   
3474   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3475   switch (rv)
3476     {
3477     case 0:
3478       break;
3479       
3480     case VNET_API_ERROR_NO_SUCH_FIB:
3481       return clib_error_return (0, "no such FIB table %d", table_id);
3482       
3483     default:
3484       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3485       break;
3486     }
3487   
3488   return 0;
3489 }
3490  
3491 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3492   .path = "set ip flow-hash",
3493   .short_help = 
3494   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3495   .function = set_ip_flow_hash_command_fn,
3496 };
3497  
3498 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3499                                  u32 table_index)
3500 {
3501   vnet_main_t * vnm = vnet_get_main();
3502   vnet_interface_main_t * im = &vnm->interface_main;
3503   ip4_main_t * ipm = &ip4_main;
3504   ip_lookup_main_t * lm = &ipm->lookup_main;
3505   vnet_classify_main_t * cm = &vnet_classify_main;
3506
3507   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3508     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3509
3510   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3511     return VNET_API_ERROR_NO_SUCH_ENTRY;
3512
3513   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3514   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3515
3516   return 0;
3517 }
3518
3519 static clib_error_t *
3520 set_ip_classify_command_fn (vlib_main_t * vm,
3521                             unformat_input_t * input,
3522                             vlib_cli_command_t * cmd)
3523 {
3524   u32 table_index = ~0;
3525   int table_index_set = 0;
3526   u32 sw_if_index = ~0;
3527   int rv;
3528   
3529   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3530     if (unformat (input, "table-index %d", &table_index))
3531       table_index_set = 1;
3532     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3533                        vnet_get_main(), &sw_if_index))
3534       ;
3535     else
3536       break;
3537   }
3538       
3539   if (table_index_set == 0)
3540     return clib_error_return (0, "classify table-index must be specified");
3541
3542   if (sw_if_index == ~0)
3543     return clib_error_return (0, "interface / subif must be specified");
3544
3545   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3546
3547   switch (rv)
3548     {
3549     case 0:
3550       break;
3551
3552     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3553       return clib_error_return (0, "No such interface");
3554
3555     case VNET_API_ERROR_NO_SUCH_ENTRY:
3556       return clib_error_return (0, "No such classifier table");
3557     }
3558   return 0;
3559 }
3560
3561 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3562     .path = "set ip classify",
3563     .short_help = 
3564     "set ip classify intfc <int> table-index <index>",
3565     .function = set_ip_classify_command_fn,
3566 };
3567