policer classify
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
197     }
198 }
199
200 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip4_fib_t * fib;
204   u32 dst_address, dst_address_length, adj_index, old_adj_index;
205   uword * hash, is_del;
206   ip4_add_del_route_callback_t * cb;
207
208   /* Either create new adjacency or use given one depending on arguments. */
209   if (a->n_add_adj > 0)
210     {
211       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
212       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
213     }
214   else
215     adj_index = a->adj_index;
216
217   dst_address = a->dst_address.data_u32;
218   dst_address_length = a->dst_address_length;
219   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
220
221   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
222   dst_address &= im->fib_masks[dst_address_length];
223
224   if (! fib->adj_index_by_dst_address[dst_address_length])
225     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
226
227   hash = fib->adj_index_by_dst_address[dst_address_length];
228
229   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
230
231   if (is_del)
232     {
233       fib->old_hash_values[0] = ~0;
234       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
235       fib->adj_index_by_dst_address[dst_address_length] = hash;
236
237       if (vec_len (im->add_del_route_callbacks) > 0
238           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
239         {
240           fib->new_hash_values[0] = ~0;
241           vec_foreach (cb, im->add_del_route_callbacks)
242             if ((a->flags & cb->required_flags) == cb->required_flags)
243               cb->function (im, cb->function_opaque,
244                             fib, a->flags,
245                             &a->dst_address, dst_address_length,
246                             fib->old_hash_values,
247                             fib->new_hash_values);
248         }
249     }
250   else
251     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
252                            adj_index);
253
254   old_adj_index = fib->old_hash_values[0];
255
256   /* Avoid spurious reference count increments */
257   if (old_adj_index == adj_index
258       && adj_index != ~0
259       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
260     {
261       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
262       if (adj->share_count > 0)
263         adj->share_count --;
264     }
265
266   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
267                                is_del ? old_adj_index : adj_index,
268                                is_del);
269
270   /* Delete old adjacency index if present and changed. */
271   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
272       && old_adj_index != ~0
273       && old_adj_index != adj_index)
274     ip_del_adjacency (lm, old_adj_index);
275 }
276
277
278 u32
279 ip4_route_get_next_hop_adj (ip4_main_t * im,
280                             u32 fib_index,
281                             ip4_address_t *next_hop,
282                             u32 next_hop_sw_if_index,
283                             u32 explicit_fib_index)
284 {
285   ip_lookup_main_t * lm = &im->lookup_main;
286   vnet_main_t * vnm = vnet_get_main();
287   uword * nh_hash, * nh_result;
288   int is_interface_next_hop;
289   u32 nh_adj_index;
290   ip4_fib_t * fib;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293
294   is_interface_next_hop = next_hop->data_u32 == 0;
295   if (is_interface_next_hop)
296     {
297       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
298       if (nh_result)
299           nh_adj_index = *nh_result;
300       else
301         {
302            ip_adjacency_t * adj;
303            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
304                                    &nh_adj_index);
305            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
306            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
307            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
308         }
309     }
310   else if (next_hop_sw_if_index == ~0)
311     {
312       /* next-hop is recursive. we always need a indirect adj
313        * for recursive paths. Any LPM we perform now will give
314        * us a valid adj, but without tracking the next-hop we
315        * have no way to keep it valid.
316        */
317       ip_adjacency_t add_adj;
318       memset (&add_adj, 0, sizeof(add_adj));
319       add_adj.n_adj = 1;
320       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
321       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
322       add_adj.explicit_fib_index = explicit_fib_index;
323       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
324     }
325   else
326     {
327       nh_hash = fib->adj_index_by_dst_address[32];
328       nh_result = hash_get (nh_hash, next_hop->data_u32);
329
330       /* Next hop must be known. */
331       if (! nh_result)
332         {
333           ip_adjacency_t * adj;
334
335           /* no /32 exists, get the longest prefix match */
336           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
337                                                     next_hop, 0);
338           adj = ip_get_adjacency (lm, nh_adj_index);
339           /* if ARP interface adjacency is present, we need to
340              install ARP adjaceny for specific next hop */
341           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
342               adj->arp.next_hop.ip4.as_u32 == 0)
343             {
344               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
345             }
346         }
347       else
348         {
349           nh_adj_index = *nh_result;
350         }
351     }
352
353   return (nh_adj_index);
354 }
355
356 void
357 ip4_add_del_route_next_hop (ip4_main_t * im,
358                             u32 flags,
359                             ip4_address_t * dst_address,
360                             u32 dst_address_length,
361                             ip4_address_t * next_hop,
362                             u32 next_hop_sw_if_index,
363                             u32 next_hop_weight, u32 adj_index, 
364                             u32 explicit_fib_index)
365 {
366   vnet_main_t * vnm = vnet_get_main();
367   ip_lookup_main_t * lm = &im->lookup_main;
368   u32 fib_index;
369   ip4_fib_t * fib;
370   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
371   u32 dst_adj_index, nh_adj_index;
372   uword * dst_hash, * dst_result;
373   ip_adjacency_t * dst_adj;
374   ip_multipath_adjacency_t * old_mp, * new_mp;
375   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
376   clib_error_t * error = 0;
377
378   if (explicit_fib_index == (u32)~0)
379       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
380   else
381       fib_index = explicit_fib_index;
382
383   fib = vec_elt_at_index (im->fibs, fib_index);
384
385   /* Lookup next hop to be added or deleted. */
386   if (adj_index == (u32)~0)
387     {
388         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
389                                                   next_hop,
390                                                   next_hop_sw_if_index,
391                                                   explicit_fib_index);
392     }
393   else
394     {
395       nh_adj_index = adj_index;
396     }
397   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
398   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
399
400   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
401   dst_result = hash_get (dst_hash, dst_address_u32);
402   if (dst_result)
403     {
404       dst_adj_index = dst_result[0];
405       dst_adj = ip_get_adjacency (lm, dst_adj_index);
406     }
407   else
408     {
409       /* For deletes destination must be known. */
410       if (is_del)
411         {
412           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
413           error = clib_error_return (0, "unknown destination %U/%d",
414                                      format_ip4_address, dst_address,
415                                      dst_address_length);
416           goto done;
417         }
418
419       dst_adj_index = ~0;
420       dst_adj = 0;
421     }
422
423   /* Ignore adds of X/32 with next hop of X. */
424   if (! is_del
425       && dst_address_length == 32
426       && dst_address->data_u32 == next_hop->data_u32 
427       && adj_index != (u32)~0)
428     {
429       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
430       error = clib_error_return (0, "prefix matches next hop %U/%d",
431                                  format_ip4_address, dst_address,
432                                  dst_address_length);
433       goto done;
434     }
435
436   /* Destination is not known and default weight is set so add route
437      to existing non-multipath adjacency */
438   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
439     {
440       /* create / delete additional mapping of existing adjacency */
441       ip4_add_del_route_args_t a;
442       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
443
444       a.table_index_or_table_id = fib_index;
445       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
446                  | IP4_ROUTE_FLAG_FIB_INDEX
447                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
448                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
449                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
450       a.dst_address = dst_address[0];
451       a.dst_address_length = dst_address_length;
452       a.adj_index = nh_adj_index;
453       a.add_adj = 0;
454       a.n_add_adj = 0;
455
456       ip4_add_del_route (im, &a);
457
458       /* adjust share count. This cannot be the only use of the adjacency 
459          unless next hop is an indiect adj where share count is already
460          incremented */
461       if (next_hop_sw_if_index != ~0) 
462         nh_adj->share_count += is_del ? -1 : 1;
463         
464       goto done;
465     }
466
467   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
468
469   if (! ip_multipath_adjacency_add_del_next_hop
470       (lm, is_del,
471        old_mp_adj_index,
472        nh_adj_index,
473        next_hop_weight,
474        &new_mp_adj_index))
475     {
476       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
477       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
478                                  format_ip4_address, next_hop);
479       goto done;
480     }
481   
482   old_mp = new_mp = 0;
483   if (old_mp_adj_index != ~0)
484     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
485   if (new_mp_adj_index != ~0)
486     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
487
488   if (old_mp != new_mp)
489     {
490       ip4_add_del_route_args_t a;
491       ip_adjacency_t * adj;
492
493       a.table_index_or_table_id = fib_index;
494       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
495                  | IP4_ROUTE_FLAG_FIB_INDEX
496                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
497                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
498       a.dst_address = dst_address[0];
499       a.dst_address_length = dst_address_length;
500       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
501       a.add_adj = 0;
502       a.n_add_adj = 0;
503
504       ip4_add_del_route (im, &a);
505
506       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
507       if (adj->n_adj == 1)
508         adj->share_count += is_del ? -1 : 1;
509     }
510
511  done:
512   if (error)
513     clib_error_report (error);
514 }
515
516 void *
517 ip4_get_route (ip4_main_t * im,
518                u32 table_index_or_table_id,
519                u32 flags,
520                u8 * address,
521                u32 address_length)
522 {
523   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
524   u32 dst_address = * (u32 *) address;
525   uword * hash, * p;
526
527   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
528   dst_address &= im->fib_masks[address_length];
529
530   hash = fib->adj_index_by_dst_address[address_length];
531   p = hash_get (hash, dst_address);
532   return (void *) p;
533 }
534
535 void
536 ip4_foreach_matching_route (ip4_main_t * im,
537                             u32 table_index_or_table_id,
538                             u32 flags,
539                             ip4_address_t * address,
540                             u32 address_length,
541                             ip4_address_t ** results,
542                             u8 ** result_lengths)
543 {
544   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
545   u32 dst_address = address->data_u32;
546   u32 this_length = address_length;
547   
548   if (*results)
549     _vec_len (*results) = 0;
550   if (*result_lengths)
551     _vec_len (*result_lengths) = 0;
552
553   while (this_length <= 32 && vec_len (results) == 0)
554     {
555       uword k, v;
556       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
557         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
558           {
559             ip4_address_t a;
560             a.data_u32 = k;
561             vec_add1 (*results, a);
562             vec_add1 (*result_lengths, this_length);
563           }
564       }));
565
566       this_length++;
567     }
568 }
569
570 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
571                                   u32 table_index_or_table_id,
572                                   u32 flags)
573 {
574   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
575   ip_lookup_main_t * lm = &im->lookup_main;
576   u32 i, l;
577   ip4_address_t a;
578   ip4_add_del_route_callback_t * cb;
579   static ip4_address_t * to_delete;
580
581   if (lm->n_adjacency_remaps == 0)
582     return;
583
584   for (l = 0; l <= 32; l++)
585     {
586       hash_pair_t * p;
587       uword * hash = fib->adj_index_by_dst_address[l];
588
589       if (hash_elts (hash) == 0)
590         continue;
591
592       if (to_delete)
593         _vec_len (to_delete) = 0;
594
595       hash_foreach_pair (p, hash, ({
596         u32 adj_index = p->value[0];
597         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
598
599         if (m)
600           {
601             /* Record destination address from hash key. */
602             a.data_u32 = p->key;
603
604             /* New adjacency points to nothing: so delete prefix. */
605             if (m == ~0)
606               vec_add1 (to_delete, a);
607             else
608               {
609                 /* Remap to new adjacency. */
610                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
611
612                 /* Set new adjacency value. */
613                 fib->new_hash_values[0] = p->value[0] = m - 1;
614
615                 vec_foreach (cb, im->add_del_route_callbacks)
616                   if ((flags & cb->required_flags) == cb->required_flags)
617                     cb->function (im, cb->function_opaque,
618                                   fib, flags | IP4_ROUTE_FLAG_ADD,
619                                   &a, l,
620                                   fib->old_hash_values,
621                                   fib->new_hash_values);
622               }
623           }
624       }));
625
626       fib->new_hash_values[0] = ~0;
627       for (i = 0; i < vec_len (to_delete); i++)
628         {
629           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
630           vec_foreach (cb, im->add_del_route_callbacks)
631             if ((flags & cb->required_flags) == cb->required_flags)
632               cb->function (im, cb->function_opaque,
633                             fib, flags | IP4_ROUTE_FLAG_DEL,
634                             &a, l,
635                             fib->old_hash_values,
636                             fib->new_hash_values);
637         }
638     }
639
640   /* Also remap adjacencies in mtrie. */
641   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
642
643   /* Reset mapping table. */
644   vec_zero (lm->adjacency_remap_table);
645
646   /* All remaps have been performed. */
647   lm->n_adjacency_remaps = 0;
648 }
649
650 void ip4_delete_matching_routes (ip4_main_t * im,
651                                  u32 table_index_or_table_id,
652                                  u32 flags,
653                                  ip4_address_t * address,
654                                  u32 address_length)
655 {
656   static ip4_address_t * matching_addresses;
657   static u8 * matching_address_lengths;
658   u32 l, i;
659   ip4_add_del_route_args_t a;
660
661   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
662   a.table_index_or_table_id = table_index_or_table_id;
663   a.adj_index = ~0;
664   a.add_adj = 0;
665   a.n_add_adj = 0;
666
667   for (l = address_length + 1; l <= 32; l++)
668     {
669       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
670                                   address,
671                                   l,
672                                   &matching_addresses,
673                                   &matching_address_lengths);
674       for (i = 0; i < vec_len (matching_addresses); i++)
675         {
676           a.dst_address = matching_addresses[i];
677           a.dst_address_length = matching_address_lengths[i];
678           ip4_add_del_route (im, &a);
679         }
680     }
681
682   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
683 }
684
685 void
686 ip4_forward_next_trace (vlib_main_t * vm,
687                         vlib_node_runtime_t * node,
688                         vlib_frame_t * frame,
689                         vlib_rx_or_tx_t which_adj_index);
690
691 always_inline uword
692 ip4_lookup_inline (vlib_main_t * vm,
693                    vlib_node_runtime_t * node,
694                    vlib_frame_t * frame,
695                    int lookup_for_responses_to_locally_received_packets,
696                    int is_indirect)
697 {
698   ip4_main_t * im = &ip4_main;
699   ip_lookup_main_t * lm = &im->lookup_main;
700   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
701   u32 n_left_from, n_left_to_next, * from, * to_next;
702   ip_lookup_next_t next;
703   u32 cpu_index = os_get_cpu_number();
704
705   from = vlib_frame_vector_args (frame);
706   n_left_from = frame->n_vectors;
707   next = node->cached_next_index;
708
709   while (n_left_from > 0)
710     {
711       vlib_get_next_frame (vm, node, next,
712                            to_next, n_left_to_next);
713
714       while (n_left_from >= 4 && n_left_to_next >= 2)
715         {
716           vlib_buffer_t * p0, * p1;
717           ip4_header_t * ip0, * ip1;
718           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
719           ip_lookup_next_t next0, next1;
720           ip_adjacency_t * adj0, * adj1;
721           ip4_fib_mtrie_t * mtrie0, * mtrie1;
722           ip4_fib_mtrie_leaf_t leaf0, leaf1;
723           ip4_address_t * dst_addr0, *dst_addr1;
724           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
725           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
726           u32 flow_hash_config0, flow_hash_config1;
727           u32 hash_c0, hash_c1;
728           u32 wrong_next;
729
730           /* Prefetch next iteration. */
731           {
732             vlib_buffer_t * p2, * p3;
733
734             p2 = vlib_get_buffer (vm, from[2]);
735             p3 = vlib_get_buffer (vm, from[3]);
736
737             vlib_prefetch_buffer_header (p2, LOAD);
738             vlib_prefetch_buffer_header (p3, LOAD);
739
740             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
741             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
742           }
743
744           pi0 = to_next[0] = from[0];
745           pi1 = to_next[1] = from[1];
746
747           p0 = vlib_get_buffer (vm, pi0);
748           p1 = vlib_get_buffer (vm, pi1);
749
750           ip0 = vlib_buffer_get_current (p0);
751           ip1 = vlib_buffer_get_current (p1);
752
753           if (is_indirect)
754             {
755               ip_adjacency_t * iadj0, * iadj1;
756               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
757               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
758               dst_addr0 = &iadj0->indirect.next_hop.ip4;
759               dst_addr1 = &iadj1->indirect.next_hop.ip4;
760             }
761           else
762             {
763               dst_addr0 = &ip0->dst_address;
764               dst_addr1 = &ip1->dst_address;
765             }
766
767           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
768           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
769           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
770             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
771           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
772             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
773
774
775           if (! lookup_for_responses_to_locally_received_packets)
776             {
777               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
778               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
779
780               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
781
782               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
783               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
784             }
785
786           tcp0 = (void *) (ip0 + 1);
787           tcp1 = (void *) (ip1 + 1);
788
789           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
790                          || ip0->protocol == IP_PROTOCOL_UDP);
791           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
792                          || ip1->protocol == IP_PROTOCOL_UDP);
793
794           if (! lookup_for_responses_to_locally_received_packets)
795             {
796               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
797               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
798             }
799
800           if (! lookup_for_responses_to_locally_received_packets)
801             {
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
804             }
805
806           if (! lookup_for_responses_to_locally_received_packets)
807             {
808               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
809               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
810             }
811
812           if (lookup_for_responses_to_locally_received_packets)
813             {
814               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
815               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
816             }
817           else
818             {
819               /* Handle default route. */
820               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
821               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
822
823               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
824               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
825             }
826
827           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
828                                                            dst_addr0,
829                                                            /* no_default_route */ 0));
830           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
831                                                            dst_addr1,
832                                                            /* no_default_route */ 0));
833           adj0 = ip_get_adjacency (lm, adj_index0);
834           adj1 = ip_get_adjacency (lm, adj_index1);
835
836           next0 = adj0->lookup_next_index;
837           next1 = adj1->lookup_next_index;
838
839           /* Use flow hash to compute multipath adjacency. */
840           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
841           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
842           if (PREDICT_FALSE (adj0->n_adj > 1))
843             {
844               flow_hash_config0 = 
845                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
846               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
847                 ip4_compute_flow_hash (ip0, flow_hash_config0);
848             }
849           if (PREDICT_FALSE(adj1->n_adj > 1))
850             {
851               flow_hash_config1 = 
852                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
853               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
854                 ip4_compute_flow_hash (ip1, flow_hash_config1);
855             }
856
857           ASSERT (adj0->n_adj > 0);
858           ASSERT (adj1->n_adj > 0);
859           ASSERT (is_pow2 (adj0->n_adj));
860           ASSERT (is_pow2 (adj1->n_adj));
861           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
862           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
863
864           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
865           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
866
867           vlib_increment_combined_counter 
868               (cm, cpu_index, adj_index0, 1,
869                vlib_buffer_length_in_chain (vm, p0) 
870                + sizeof(ethernet_header_t));
871           vlib_increment_combined_counter 
872               (cm, cpu_index, adj_index1, 1,
873                vlib_buffer_length_in_chain (vm, p1)
874                + sizeof(ethernet_header_t));
875
876           from += 2;
877           to_next += 2;
878           n_left_to_next -= 2;
879           n_left_from -= 2;
880
881           wrong_next = (next0 != next) + 2*(next1 != next);
882           if (PREDICT_FALSE (wrong_next != 0))
883             {
884               switch (wrong_next)
885                 {
886                 case 1:
887                   /* A B A */
888                   to_next[-2] = pi1;
889                   to_next -= 1;
890                   n_left_to_next += 1;
891                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
892                   break;
893
894                 case 2:
895                   /* A A B */
896                   to_next -= 1;
897                   n_left_to_next += 1;
898                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
899                   break;
900
901                 case 3:
902                   /* A B C */
903                   to_next -= 2;
904                   n_left_to_next += 2;
905                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
906                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
907                   if (next0 == next1)
908                     {
909                       /* A B B */
910                       vlib_put_next_frame (vm, node, next, n_left_to_next);
911                       next = next1;
912                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
913                     }
914                 }
915             }
916         }
917     
918       while (n_left_from > 0 && n_left_to_next > 0)
919         {
920           vlib_buffer_t * p0;
921           ip4_header_t * ip0;
922           __attribute__((unused)) tcp_header_t * tcp0;
923           ip_lookup_next_t next0;
924           ip_adjacency_t * adj0;
925           ip4_fib_mtrie_t * mtrie0;
926           ip4_fib_mtrie_leaf_t leaf0;
927           ip4_address_t * dst_addr0;
928           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
929           u32 flow_hash_config0, hash_c0;
930
931           pi0 = from[0];
932           to_next[0] = pi0;
933
934           p0 = vlib_get_buffer (vm, pi0);
935
936           ip0 = vlib_buffer_get_current (p0);
937
938           if (is_indirect)
939             {
940               ip_adjacency_t * iadj0;
941               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
942               dst_addr0 = &iadj0->indirect.next_hop.ip4;
943             }
944           else
945             {
946               dst_addr0 = &ip0->dst_address;
947             }
948
949           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
950           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
951             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
952
953           if (! lookup_for_responses_to_locally_received_packets)
954             {
955               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
956
957               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
958
959               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
960             }
961
962           tcp0 = (void *) (ip0 + 1);
963
964           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
965                          || ip0->protocol == IP_PROTOCOL_UDP);
966
967           if (! lookup_for_responses_to_locally_received_packets)
968             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
969
970           if (! lookup_for_responses_to_locally_received_packets)
971             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
972
973           if (! lookup_for_responses_to_locally_received_packets)
974             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
975
976           if (lookup_for_responses_to_locally_received_packets)
977             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
978           else
979             {
980               /* Handle default route. */
981               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
982               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
983             }
984
985           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
986                                                            dst_addr0,
987                                                            /* no_default_route */ 0));
988
989           adj0 = ip_get_adjacency (lm, adj_index0);
990
991           next0 = adj0->lookup_next_index;
992
993           /* Use flow hash to compute multipath adjacency. */
994           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
995           if (PREDICT_FALSE(adj0->n_adj > 1))
996             {
997               flow_hash_config0 = 
998                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
999
1000               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1001                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1002             }
1003
1004           ASSERT (adj0->n_adj > 0);
1005           ASSERT (is_pow2 (adj0->n_adj));
1006           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1007
1008           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1009
1010           vlib_increment_combined_counter 
1011               (cm, cpu_index, adj_index0, 1,
1012                vlib_buffer_length_in_chain (vm, p0)
1013                + sizeof(ethernet_header_t));
1014
1015           from += 1;
1016           to_next += 1;
1017           n_left_to_next -= 1;
1018           n_left_from -= 1;
1019
1020           if (PREDICT_FALSE (next0 != next))
1021             {
1022               n_left_to_next += 1;
1023               vlib_put_next_frame (vm, node, next, n_left_to_next);
1024               next = next0;
1025               vlib_get_next_frame (vm, node, next,
1026                                    to_next, n_left_to_next);
1027               to_next[0] = pi0;
1028               to_next += 1;
1029               n_left_to_next -= 1;
1030             }
1031         }
1032
1033       vlib_put_next_frame (vm, node, next, n_left_to_next);
1034     }
1035
1036   if (node->flags & VLIB_NODE_FLAG_TRACE)
1037     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1038
1039   return frame->n_vectors;
1040 }
1041
1042 /** \brief IPv4 lookup node.
1043     @node ip4-lookup
1044
1045     This is the main IPv4 lookup dispatch node.
1046
1047     @param vm vlib_main_t corresponding to the current thread
1048     @param node vlib_node_runtime_t
1049     @param frame vlib_frame_t whose contents should be dispatched
1050
1051     @par Graph mechanics: buffer metadata, next index usage
1052
1053     @em Uses:
1054     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1055         - Indicates the @c sw_if_index value of the interface that the
1056           packet was received on.
1057     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1058         - When the value is @c ~0 then the node performs a longest prefix
1059           match (LPM) for the packet destination address in the FIB attached
1060           to the receive interface.
1061         - Otherwise perform LPM for the packet destination address in the
1062           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1063           value (0, 1, ...) and not a VRF id.
1064
1065     @em Sets:
1066     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1067         - The lookup result adjacency index.
1068
1069     <em>Next Index:</em>
1070     - Dispatches the packet to the node index found in
1071       ip_adjacency_t @c adj->lookup_next_index
1072       (where @c adj is the lookup result adjacency).
1073 */
1074 static uword
1075 ip4_lookup (vlib_main_t * vm,
1076             vlib_node_runtime_t * node,
1077             vlib_frame_t * frame)
1078 {
1079   return ip4_lookup_inline (vm, node, frame,
1080                             /* lookup_for_responses_to_locally_received_packets */ 0,
1081                             /* is_indirect */ 0);
1082
1083 }
1084
1085 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1086                                         ip_adjacency_t * adj,
1087                                         u32 sw_if_index,
1088                                         u32 if_address_index)
1089 {
1090   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1091   ip_lookup_next_t n;
1092   vnet_l3_packet_type_t packet_type;
1093   u32 node_index;
1094
1095   if (hw->hw_class_index == ethernet_hw_interface_class.index
1096       || hw->hw_class_index == srp_hw_interface_class.index)
1097     {
1098       /* 
1099        * We have a bit of a problem in this case. ip4-arp uses
1100        * the rewrite_header.next_index to hand pkts to the
1101        * indicated inteface output node. We can end up in
1102        * ip4_rewrite_local, too, which also pays attention to 
1103        * rewrite_header.next index. Net result: a hack in
1104        * ip4_rewrite_local...
1105        */
1106       n = IP_LOOKUP_NEXT_ARP;
1107       node_index = ip4_arp_node.index;
1108       adj->if_address_index = if_address_index;
1109       adj->arp.next_hop.ip4.as_u32 = 0;
1110       ip46_address_reset(&adj->arp.next_hop);
1111       packet_type = VNET_L3_PACKET_TYPE_ARP;
1112     }
1113   else
1114     {
1115       n = IP_LOOKUP_NEXT_REWRITE;
1116       node_index = ip4_rewrite_node.index;
1117       packet_type = VNET_L3_PACKET_TYPE_IP4;
1118     }
1119
1120   adj->lookup_next_index = n;
1121   vnet_rewrite_for_sw_interface
1122     (vnm,
1123      packet_type,
1124      sw_if_index,
1125      node_index,
1126      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1127      &adj->rewrite_header,
1128      sizeof (adj->rewrite_data));
1129 }
1130
1131 static void
1132 ip4_add_interface_routes (u32 sw_if_index,
1133                           ip4_main_t * im, u32 fib_index,
1134                           ip_interface_address_t * a)
1135 {
1136   vnet_main_t * vnm = vnet_get_main();
1137   ip_lookup_main_t * lm = &im->lookup_main;
1138   ip_adjacency_t * adj;
1139   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1140   ip4_add_del_route_args_t x;
1141   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1142   u32 classify_table_index;
1143
1144   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1145   x.table_index_or_table_id = fib_index;
1146   x.flags = (IP4_ROUTE_FLAG_ADD
1147              | IP4_ROUTE_FLAG_FIB_INDEX
1148              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1149   x.dst_address = address[0];
1150   x.dst_address_length = a->address_length;
1151   x.n_add_adj = 0;
1152   x.add_adj = 0;
1153
1154   a->neighbor_probe_adj_index = ~0;
1155   if (a->address_length < 32)
1156     {
1157       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1158                               &x.adj_index);
1159       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1160       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1161       ip4_add_del_route (im, &x);
1162       a->neighbor_probe_adj_index = x.adj_index;
1163     }
1164   
1165   /* Add e.g. 1.1.1.1/32 as local to this host. */
1166   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1167                           &x.adj_index);
1168   
1169   classify_table_index = ~0;
1170   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1171     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1172   if (classify_table_index != (u32) ~0)
1173     {
1174       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1175       adj->classify.table_index = classify_table_index;
1176     }
1177   else
1178     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1179   
1180   adj->if_address_index = a - lm->if_address_pool;
1181   adj->rewrite_header.sw_if_index = sw_if_index;
1182   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1183   /* 
1184    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1185    * fail an RPF-ish check, but still go thru the rewrite code...
1186    */
1187   adj->rewrite_header.data_bytes = 0;
1188
1189   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1190   x.dst_address_length = 32;
1191   ip4_add_del_route (im, &x);
1192 }
1193
1194 static void
1195 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1196 {
1197   ip4_add_del_route_args_t x;
1198
1199   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1200   x.table_index_or_table_id = fib_index;
1201   x.flags = (IP4_ROUTE_FLAG_DEL
1202              | IP4_ROUTE_FLAG_FIB_INDEX
1203              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1204   x.dst_address = address[0];
1205   x.dst_address_length = address_length;
1206   x.adj_index = ~0;
1207   x.n_add_adj = 0;
1208   x.add_adj = 0;
1209
1210   if (address_length < 32)
1211     ip4_add_del_route (im, &x);
1212
1213   x.dst_address_length = 32;
1214   ip4_add_del_route (im, &x);
1215
1216   ip4_delete_matching_routes (im,
1217                               fib_index,
1218                               IP4_ROUTE_FLAG_FIB_INDEX,
1219                               address,
1220                               address_length);
1221 }
1222
1223 typedef struct {
1224     u32 sw_if_index;
1225     ip4_address_t address;
1226     u32 length;
1227 } ip4_interface_address_t;
1228
1229 static clib_error_t *
1230 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1231                                         u32 sw_if_index,
1232                                         ip4_address_t * new_address,
1233                                         u32 new_length,
1234                                         u32 redistribute,
1235                                         u32 insert_routes,
1236                                         u32 is_del);
1237
1238 static clib_error_t *
1239 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1240                                         u32 sw_if_index,
1241                                         ip4_address_t * address,
1242                                         u32 address_length,
1243                                         u32 redistribute,
1244                                         u32 insert_routes,
1245                                         u32 is_del)
1246 {
1247   vnet_main_t * vnm = vnet_get_main();
1248   ip4_main_t * im = &ip4_main;
1249   ip_lookup_main_t * lm = &im->lookup_main;
1250   clib_error_t * error = 0;
1251   u32 if_address_index, elts_before;
1252   ip4_address_fib_t ip4_af, * addr_fib = 0;
1253
1254   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1255   ip4_addr_fib_init (&ip4_af, address,
1256                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1257   vec_add1 (addr_fib, ip4_af);
1258
1259   /* When adding an address check that it does not conflict with an existing address. */
1260   if (! is_del)
1261     {
1262       ip_interface_address_t * ia;
1263       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1264                                     0 /* honor unnumbered */,
1265       ({
1266         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1267
1268         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1269             || ip4_destination_matches_route (im, x, address, address_length))
1270           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1271                                     format_ip4_address_and_length, address, address_length,
1272                                     format_ip4_address_and_length, x, ia->address_length,
1273                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1274       }));
1275     }
1276
1277   elts_before = pool_elts (lm->if_address_pool);
1278
1279   error = ip_interface_address_add_del
1280     (lm,
1281      sw_if_index,
1282      addr_fib,
1283      address_length,
1284      is_del,
1285      &if_address_index);
1286   if (error)
1287     goto done;
1288   
1289   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1290     {
1291       if (is_del)
1292         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1293                                   address_length);
1294       
1295       else
1296           ip4_add_interface_routes (sw_if_index,
1297                                     im, ip4_af.fib_index,
1298                                     pool_elt_at_index 
1299                                     (lm->if_address_pool, if_address_index));
1300     }
1301
1302   /* If pool did not grow/shrink: add duplicate address. */
1303   if (elts_before != pool_elts (lm->if_address_pool))
1304     {
1305       ip4_add_del_interface_address_callback_t * cb;
1306       vec_foreach (cb, im->add_del_interface_address_callbacks)
1307         cb->function (im, cb->function_opaque, sw_if_index,
1308                       address, address_length,
1309                       if_address_index,
1310                       is_del);
1311     }
1312
1313  done:
1314   vec_free (addr_fib);
1315   return error;
1316 }
1317
1318 clib_error_t *
1319 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1320                                ip4_address_t * address, u32 address_length,
1321                                u32 is_del)
1322 {
1323   return ip4_add_del_interface_address_internal
1324     (vm, sw_if_index, address, address_length,
1325      /* redistribute */ 1,
1326      /* insert_routes */ 1,
1327      is_del);
1328 }
1329
1330 static clib_error_t *
1331 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1332                                 u32 sw_if_index,
1333                                 u32 flags)
1334 {
1335   ip4_main_t * im = &ip4_main;
1336   ip_interface_address_t * ia;
1337   ip4_address_t * a;
1338   u32 is_admin_up, fib_index;
1339   
1340   /* Fill in lookup tables with default table (0). */
1341   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1342   
1343   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1344   
1345   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1346   
1347   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1348
1349   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1350                                 0 /* honor unnumbered */,
1351   ({
1352     a = ip_interface_address_get_address (&im->lookup_main, ia);
1353     if (is_admin_up)
1354       ip4_add_interface_routes (sw_if_index,
1355                                 im, fib_index,
1356                                 ia);
1357     else
1358       ip4_del_interface_routes (im, fib_index,
1359                                 a, ia->address_length);
1360   }));
1361
1362   return 0;
1363 }
1364  
1365 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1366
1367 /* Built-in ip4 unicast rx feature path definition */
1368 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1369   .node_name = "ip4-inacl", 
1370   .runs_before = {"ip4-source-check-via-rx", 0}, 
1371   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1372 };
1373
1374 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1375   .node_name = "ip4-source-check-via-rx",
1376   .runs_before = {"ip4-source-check-via-any", 0},
1377   .feature_index = 
1378   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1379 };
1380
1381 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1382   .node_name = "ip4-source-check-via-any",
1383   .runs_before = {"ip4-policer-classify", 0},
1384   .feature_index = 
1385   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1386 };
1387
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1389   .node_name = "ip4-policer-classify",
1390   .runs_before = {"ipsec-input-ip4", 0},
1391   .feature_index =
1392   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1393 };
1394
1395 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1396   .node_name = "ipsec-input-ip4",
1397   .runs_before = {"vpath-input-ip4", 0},
1398   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1399 };
1400
1401 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1402   .node_name = "vpath-input-ip4",
1403   .runs_before = {"ip4-lookup", 0},
1404   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1405 };
1406
1407 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1408   .node_name = "ip4-lookup",
1409   .runs_before = {0}, /* not before any other features */
1410   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1411 };
1412
1413 /* Built-in ip4 multicast rx feature path definition */
1414 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1415   .node_name = "vpath-input-ip4",
1416   .runs_before = {"ip4-lookup-multicast", 0},
1417   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1418 };
1419
1420 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1421   .node_name = "ip4-lookup-multicast",
1422   .runs_before = {0}, /* not before any other features */
1423   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1424 };
1425
1426 static char * feature_start_nodes[] = 
1427   { "ip4-input", "ip4-input-no-checksum"};
1428
1429 static clib_error_t *
1430 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1431 {
1432   ip_lookup_main_t * lm = &im->lookup_main;
1433   clib_error_t * error;
1434   vnet_cast_t cast;
1435
1436   for (cast = 0; cast < VNET_N_CAST; cast++)
1437     {
1438       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1439       vnet_config_main_t * vcm = &cm->config_main;
1440
1441       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1442                                          feature_start_nodes,
1443                                          ARRAY_LEN(feature_start_nodes),
1444                                          cast,
1445                                          1 /* is_ip4 */)))
1446         return error;
1447     }
1448   return 0;
1449 }
1450
1451 static clib_error_t *
1452 ip4_sw_interface_add_del (vnet_main_t * vnm,
1453                           u32 sw_if_index,
1454                           u32 is_add)
1455 {
1456   vlib_main_t * vm = vnm->vlib_main;
1457   ip4_main_t * im = &ip4_main;
1458   ip_lookup_main_t * lm = &im->lookup_main;
1459   u32 ci, cast;
1460   u32 feature_index;
1461
1462   for (cast = 0; cast < VNET_N_CAST; cast++)
1463     {
1464       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1465       vnet_config_main_t * vcm = &cm->config_main;
1466
1467       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1468       ci = cm->config_index_by_sw_if_index[sw_if_index];
1469
1470       if (cast == VNET_UNICAST)
1471         feature_index = im->ip4_unicast_rx_feature_lookup;
1472       else
1473         feature_index = im->ip4_multicast_rx_feature_lookup;
1474
1475       if (is_add)
1476         ci = vnet_config_add_feature (vm, vcm,
1477                                       ci,
1478                                       feature_index,
1479                                       /* config data */ 0,
1480                                       /* # bytes of config data */ 0);
1481       else
1482         ci = vnet_config_del_feature (vm, vcm,
1483                                       ci,
1484                                       feature_index,
1485                                       /* config data */ 0,
1486                                       /* # bytes of config data */ 0);
1487
1488       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1489     }
1490
1491   return /* no error */ 0;
1492 }
1493
1494 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1495
1496 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1497
1498 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1499   .function = ip4_lookup,
1500   .name = "ip4-lookup",
1501   .vector_size = sizeof (u32),
1502
1503   .format_trace = format_ip4_lookup_trace,
1504
1505   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1506   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1507 };
1508
1509 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1510
1511 static uword
1512 ip4_indirect (vlib_main_t * vm,
1513                vlib_node_runtime_t * node,
1514                vlib_frame_t * frame)
1515 {
1516   return ip4_lookup_inline (vm, node, frame,
1517                             /* lookup_for_responses_to_locally_received_packets */ 0,
1518                             /* is_indirect */ 1);
1519 }
1520
1521 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1522   .function = ip4_indirect,
1523   .name = "ip4-indirect",
1524   .vector_size = sizeof (u32),
1525   .sibling_of = "ip4-lookup",
1526   .format_trace = format_ip4_lookup_trace,
1527
1528   .n_next_nodes = 0,
1529 };
1530
1531 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1532
1533
1534 /* Global IP4 main. */
1535 ip4_main_t ip4_main;
1536
1537 clib_error_t *
1538 ip4_lookup_init (vlib_main_t * vm)
1539 {
1540   ip4_main_t * im = &ip4_main;
1541   clib_error_t * error;
1542   uword i;
1543
1544   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1545     {
1546       u32 m;
1547
1548       if (i < 32)
1549         m = pow2_mask (i) << (32 - i);
1550       else 
1551         m = ~0;
1552       im->fib_masks[i] = clib_host_to_net_u32 (m);
1553     }
1554
1555   /* Create FIB with index 0 and table id of 0. */
1556   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1557
1558   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1559
1560   {
1561     pg_node_t * pn;
1562     pn = pg_get_node (ip4_lookup_node.index);
1563     pn->unformat_edit = unformat_pg_ip4_header;
1564   }
1565
1566   {
1567     ethernet_arp_header_t h;
1568
1569     memset (&h, 0, sizeof (h));
1570
1571     /* Set target ethernet address to all zeros. */
1572     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1573
1574 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1575 #define _8(f,v) h.f = v;
1576     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1577     _16 (l3_type, ETHERNET_TYPE_IP4);
1578     _8 (n_l2_address_bytes, 6);
1579     _8 (n_l3_address_bytes, 4);
1580     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1581 #undef _16
1582 #undef _8
1583
1584     vlib_packet_template_init (vm,
1585                                &im->ip4_arp_request_packet_template,
1586                                /* data */ &h,
1587                                sizeof (h),
1588                                /* alloc chunk size */ 8,
1589                                "ip4 arp");
1590   }
1591
1592   error = ip4_feature_init (vm, im);
1593
1594   return error;
1595 }
1596
1597 VLIB_INIT_FUNCTION (ip4_lookup_init);
1598
1599 typedef struct {
1600   /* Adjacency taken. */
1601   u32 adj_index;
1602   u32 flow_hash;
1603   u32 fib_index;
1604
1605   /* Packet data, possibly *after* rewrite. */
1606   u8 packet_data[64 - 1*sizeof(u32)];
1607 } ip4_forward_next_trace_t;
1608
1609 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1610 {
1611   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1612   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1613   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1614   uword indent = format_get_indent (s);
1615   s = format (s, "%U%U",
1616                 format_white_space, indent,
1617                 format_ip4_header, t->packet_data);
1618   return s;
1619 }
1620
1621 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1622 {
1623   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1624   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1625   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1626   vnet_main_t * vnm = vnet_get_main();
1627   ip4_main_t * im = &ip4_main;
1628   uword indent = format_get_indent (s);
1629
1630   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1631               t->fib_index, t->adj_index, format_ip_adjacency,
1632               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1633   s = format (s, "\n%U%U",
1634               format_white_space, indent,
1635               format_ip4_header, t->packet_data);
1636   return s;
1637 }
1638
1639 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1640 {
1641   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1642   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1643   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1644   vnet_main_t * vnm = vnet_get_main();
1645   ip4_main_t * im = &ip4_main;
1646   uword indent = format_get_indent (s);
1647
1648   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1649               t->fib_index, t->adj_index, format_ip_adjacency,
1650               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1651   s = format (s, "\n%U%U",
1652               format_white_space, indent,
1653               format_ip_adjacency_packet_data,
1654               vnm, &im->lookup_main, t->adj_index,
1655               t->packet_data, sizeof (t->packet_data));
1656   return s;
1657 }
1658
1659 /* Common trace function for all ip4-forward next nodes. */
1660 void
1661 ip4_forward_next_trace (vlib_main_t * vm,
1662                         vlib_node_runtime_t * node,
1663                         vlib_frame_t * frame,
1664                         vlib_rx_or_tx_t which_adj_index)
1665 {
1666   u32 * from, n_left;
1667   ip4_main_t * im = &ip4_main;
1668
1669   n_left = frame->n_vectors;
1670   from = vlib_frame_vector_args (frame);
1671   
1672   while (n_left >= 4)
1673     {
1674       u32 bi0, bi1;
1675       vlib_buffer_t * b0, * b1;
1676       ip4_forward_next_trace_t * t0, * t1;
1677
1678       /* Prefetch next iteration. */
1679       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1680       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1681
1682       bi0 = from[0];
1683       bi1 = from[1];
1684
1685       b0 = vlib_get_buffer (vm, bi0);
1686       b1 = vlib_get_buffer (vm, bi1);
1687
1688       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1689         {
1690           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1691           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1692           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1693           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1694               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1695               vec_elt (im->fib_index_by_sw_if_index,
1696                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1697
1698           clib_memcpy (t0->packet_data,
1699                   vlib_buffer_get_current (b0),
1700                   sizeof (t0->packet_data));
1701         }
1702       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1703         {
1704           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1705           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1706           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1707           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1708               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1709               vec_elt (im->fib_index_by_sw_if_index,
1710                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1711           clib_memcpy (t1->packet_data,
1712                   vlib_buffer_get_current (b1),
1713                   sizeof (t1->packet_data));
1714         }
1715       from += 2;
1716       n_left -= 2;
1717     }
1718
1719   while (n_left >= 1)
1720     {
1721       u32 bi0;
1722       vlib_buffer_t * b0;
1723       ip4_forward_next_trace_t * t0;
1724
1725       bi0 = from[0];
1726
1727       b0 = vlib_get_buffer (vm, bi0);
1728
1729       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1730         {
1731           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1732           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1733           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1734           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1735               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1736               vec_elt (im->fib_index_by_sw_if_index,
1737                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1738           clib_memcpy (t0->packet_data,
1739                   vlib_buffer_get_current (b0),
1740                   sizeof (t0->packet_data));
1741         }
1742       from += 1;
1743       n_left -= 1;
1744     }
1745 }
1746
1747 static uword
1748 ip4_drop_or_punt (vlib_main_t * vm,
1749                   vlib_node_runtime_t * node,
1750                   vlib_frame_t * frame,
1751                   ip4_error_t error_code)
1752 {
1753   u32 * buffers = vlib_frame_vector_args (frame);
1754   uword n_packets = frame->n_vectors;
1755
1756   vlib_error_drop_buffers (vm, node,
1757                            buffers,
1758                            /* stride */ 1,
1759                            n_packets,
1760                            /* next */ 0,
1761                            ip4_input_node.index,
1762                            error_code);
1763
1764   if (node->flags & VLIB_NODE_FLAG_TRACE)
1765     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1766
1767   return n_packets;
1768 }
1769
1770 static uword
1771 ip4_drop (vlib_main_t * vm,
1772           vlib_node_runtime_t * node,
1773           vlib_frame_t * frame)
1774 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1775
1776 static uword
1777 ip4_punt (vlib_main_t * vm,
1778           vlib_node_runtime_t * node,
1779           vlib_frame_t * frame)
1780 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1781
1782 static uword
1783 ip4_miss (vlib_main_t * vm,
1784           vlib_node_runtime_t * node,
1785           vlib_frame_t * frame)
1786 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1787
1788 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1789   .function = ip4_drop,
1790   .name = "ip4-drop",
1791   .vector_size = sizeof (u32),
1792
1793   .format_trace = format_ip4_forward_next_trace,
1794
1795   .n_next_nodes = 1,
1796   .next_nodes = {
1797     [0] = "error-drop",
1798   },
1799 };
1800
1801 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1802
1803 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1804   .function = ip4_punt,
1805   .name = "ip4-punt",
1806   .vector_size = sizeof (u32),
1807
1808   .format_trace = format_ip4_forward_next_trace,
1809
1810   .n_next_nodes = 1,
1811   .next_nodes = {
1812     [0] = "error-punt",
1813   },
1814 };
1815
1816 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1817
1818 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1819   .function = ip4_miss,
1820   .name = "ip4-miss",
1821   .vector_size = sizeof (u32),
1822
1823   .format_trace = format_ip4_forward_next_trace,
1824
1825   .n_next_nodes = 1,
1826   .next_nodes = {
1827     [0] = "error-drop",
1828   },
1829 };
1830
1831 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1832
1833 /* Compute TCP/UDP/ICMP4 checksum in software. */
1834 u16
1835 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1836                               ip4_header_t * ip0)
1837 {
1838   ip_csum_t sum0;
1839   u32 ip_header_length, payload_length_host_byte_order;
1840   u32 n_this_buffer, n_bytes_left;
1841   u16 sum16;
1842   void * data_this_buffer;
1843   
1844   /* Initialize checksum with ip header. */
1845   ip_header_length = ip4_header_bytes (ip0);
1846   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1847   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1848
1849   if (BITS (uword) == 32)
1850     {
1851       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1852       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1853     }
1854   else
1855     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1856
1857   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1858   data_this_buffer = (void *) ip0 + ip_header_length;
1859   if (n_this_buffer + ip_header_length > p0->current_length)
1860     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1861   while (1)
1862     {
1863       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1864       n_bytes_left -= n_this_buffer;
1865       if (n_bytes_left == 0)
1866         break;
1867
1868       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1869       p0 = vlib_get_buffer (vm, p0->next_buffer);
1870       data_this_buffer = vlib_buffer_get_current (p0);
1871       n_this_buffer = p0->current_length;
1872     }
1873
1874   sum16 = ~ ip_csum_fold (sum0);
1875
1876   return sum16;
1877 }
1878
1879 static u32
1880 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1881 {
1882   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1883   udp_header_t * udp0;
1884   u16 sum16;
1885
1886   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1887           || ip0->protocol == IP_PROTOCOL_UDP);
1888
1889   udp0 = (void *) (ip0 + 1);
1890   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1891     {
1892       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1893                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1894       return p0->flags;
1895     }
1896
1897   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1898
1899   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1900                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1901
1902   return p0->flags;
1903 }
1904
1905 static uword
1906 ip4_local (vlib_main_t * vm,
1907            vlib_node_runtime_t * node,
1908            vlib_frame_t * frame)
1909 {
1910   ip4_main_t * im = &ip4_main;
1911   ip_lookup_main_t * lm = &im->lookup_main;
1912   ip_local_next_t next_index;
1913   u32 * from, * to_next, n_left_from, n_left_to_next;
1914   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1915
1916   from = vlib_frame_vector_args (frame);
1917   n_left_from = frame->n_vectors;
1918   next_index = node->cached_next_index;
1919   
1920   if (node->flags & VLIB_NODE_FLAG_TRACE)
1921     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1922
1923   while (n_left_from > 0)
1924     {
1925       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1926
1927       while (n_left_from >= 4 && n_left_to_next >= 2)
1928         {
1929           vlib_buffer_t * p0, * p1;
1930           ip4_header_t * ip0, * ip1;
1931           udp_header_t * udp0, * udp1;
1932           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1933           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1934           ip_adjacency_t * adj0, * adj1;
1935           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1936           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1937           i32 len_diff0, len_diff1;
1938           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1939           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1940           u8 enqueue_code;
1941       
1942           pi0 = to_next[0] = from[0];
1943           pi1 = to_next[1] = from[1];
1944           from += 2;
1945           n_left_from -= 2;
1946           to_next += 2;
1947           n_left_to_next -= 2;
1948       
1949           p0 = vlib_get_buffer (vm, pi0);
1950           p1 = vlib_get_buffer (vm, pi1);
1951
1952           ip0 = vlib_buffer_get_current (p0);
1953           ip1 = vlib_buffer_get_current (p1);
1954
1955           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1956                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1957           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1958                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1959
1960           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1961           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1962
1963           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1964
1965           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1966           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1967
1968           /* Treat IP frag packets as "experimental" protocol for now
1969              until support of IP frag reassembly is implemented */
1970           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1971           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1972           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1973           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1974           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1975           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1976
1977           flags0 = p0->flags;
1978           flags1 = p1->flags;
1979
1980           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1981           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1982
1983           udp0 = ip4_next_header (ip0);
1984           udp1 = ip4_next_header (ip1);
1985
1986           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1987           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1988           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1989
1990           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1991           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1992
1993           /* Verify UDP length. */
1994           ip_len0 = clib_net_to_host_u16 (ip0->length);
1995           ip_len1 = clib_net_to_host_u16 (ip1->length);
1996           udp_len0 = clib_net_to_host_u16 (udp0->length);
1997           udp_len1 = clib_net_to_host_u16 (udp1->length);
1998
1999           len_diff0 = ip_len0 - udp_len0;
2000           len_diff1 = ip_len1 - udp_len1;
2001
2002           len_diff0 = is_udp0 ? len_diff0 : 0;
2003           len_diff1 = is_udp1 ? len_diff1 : 0;
2004
2005           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2006                                 & good_tcp_udp0 & good_tcp_udp1)))
2007             {
2008               if (is_tcp_udp0)
2009                 {
2010                   if (is_tcp_udp0
2011                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2012                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2013                   good_tcp_udp0 =
2014                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2015                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2016                 }
2017               if (is_tcp_udp1)
2018                 {
2019                   if (is_tcp_udp1
2020                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2021                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2022                   good_tcp_udp1 =
2023                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2024                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2025                 }
2026             }
2027
2028           good_tcp_udp0 &= len_diff0 >= 0;
2029           good_tcp_udp1 &= len_diff1 >= 0;
2030
2031           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2032           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2033
2034           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2035
2036           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2037           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2038
2039           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2040           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2041                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2042                     : error0);
2043           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2044                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2045                     : error1);
2046
2047           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2048           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2049
2050           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2051           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2052
2053           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2054           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2055
2056           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2057                                                            &ip0->src_address,
2058                                                            /* no_default_route */ 1));
2059           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2060                                                            &ip1->src_address,
2061                                                            /* no_default_route */ 1));
2062
2063           adj0 = ip_get_adjacency (lm, adj_index0);
2064           adj1 = ip_get_adjacency (lm, adj_index1);
2065
2066           /* 
2067            * Must have a route to source otherwise we drop the packet.
2068            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2069            */
2070           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2071                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2072                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2073                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2074                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2075                     ? IP4_ERROR_SRC_LOOKUP_MISS
2076                     : error0);
2077           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2078                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2079                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2080                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2081                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2082                     ? IP4_ERROR_SRC_LOOKUP_MISS
2083                     : error1);
2084
2085           next0 = lm->local_next_by_ip_protocol[proto0];
2086           next1 = lm->local_next_by_ip_protocol[proto1];
2087
2088           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2089           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2090
2091           p0->error = error0 ? error_node->errors[error0] : 0;
2092           p1->error = error1 ? error_node->errors[error1] : 0;
2093
2094           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2095
2096           if (PREDICT_FALSE (enqueue_code != 0))
2097             {
2098               switch (enqueue_code)
2099                 {
2100                 case 1:
2101                   /* A B A */
2102                   to_next[-2] = pi1;
2103                   to_next -= 1;
2104                   n_left_to_next += 1;
2105                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2106                   break;
2107
2108                 case 2:
2109                   /* A A B */
2110                   to_next -= 1;
2111                   n_left_to_next += 1;
2112                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2113                   break;
2114
2115                 case 3:
2116                   /* A B B or A B C */
2117                   to_next -= 2;
2118                   n_left_to_next += 2;
2119                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2120                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2121                   if (next0 == next1)
2122                     {
2123                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2124                       next_index = next1;
2125                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2126                     }
2127                   break;
2128                 }
2129             }
2130         }
2131
2132       while (n_left_from > 0 && n_left_to_next > 0)
2133         {
2134           vlib_buffer_t * p0;
2135           ip4_header_t * ip0;
2136           udp_header_t * udp0;
2137           ip4_fib_mtrie_t * mtrie0;
2138           ip4_fib_mtrie_leaf_t leaf0;
2139           ip_adjacency_t * adj0;
2140           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2141           i32 len_diff0;
2142           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2143       
2144           pi0 = to_next[0] = from[0];
2145           from += 1;
2146           n_left_from -= 1;
2147           to_next += 1;
2148           n_left_to_next -= 1;
2149       
2150           p0 = vlib_get_buffer (vm, pi0);
2151
2152           ip0 = vlib_buffer_get_current (p0);
2153
2154           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2155                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2156
2157           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2158
2159           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2160
2161           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2162
2163           /* Treat IP frag packets as "experimental" protocol for now
2164              until support of IP frag reassembly is implemented */
2165           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2166           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2167           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2168
2169           flags0 = p0->flags;
2170
2171           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2172
2173           udp0 = ip4_next_header (ip0);
2174
2175           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2176           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2177
2178           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2179
2180           /* Verify UDP length. */
2181           ip_len0 = clib_net_to_host_u16 (ip0->length);
2182           udp_len0 = clib_net_to_host_u16 (udp0->length);
2183
2184           len_diff0 = ip_len0 - udp_len0;
2185
2186           len_diff0 = is_udp0 ? len_diff0 : 0;
2187
2188           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2189             {
2190               if (is_tcp_udp0)
2191                 {
2192                   if (is_tcp_udp0
2193                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2194                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2195                   good_tcp_udp0 =
2196                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2197                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2198                 }
2199             }
2200
2201           good_tcp_udp0 &= len_diff0 >= 0;
2202
2203           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2204
2205           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2206
2207           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2208
2209           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2210           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2211                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2212                     : error0);
2213
2214           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2215
2216           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2217           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2218
2219           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2220                                                            &ip0->src_address,
2221                                                            /* no_default_route */ 1));
2222
2223           adj0 = ip_get_adjacency (lm, adj_index0);
2224
2225           /* Must have a route to source otherwise we drop the packet. */
2226           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2227                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2228                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2229                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2230                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2231                     ? IP4_ERROR_SRC_LOOKUP_MISS
2232                     : error0);
2233
2234           next0 = lm->local_next_by_ip_protocol[proto0];
2235
2236           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2237
2238           p0->error = error0? error_node->errors[error0] : 0;
2239
2240           if (PREDICT_FALSE (next0 != next_index))
2241             {
2242               n_left_to_next += 1;
2243               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2244
2245               next_index = next0;
2246               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2247               to_next[0] = pi0;
2248               to_next += 1;
2249               n_left_to_next -= 1;
2250             }
2251         }
2252   
2253       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2254     }
2255
2256   return frame->n_vectors;
2257 }
2258
2259 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2260   .function = ip4_local,
2261   .name = "ip4-local",
2262   .vector_size = sizeof (u32),
2263
2264   .format_trace = format_ip4_forward_next_trace,
2265
2266   .n_next_nodes = IP_LOCAL_N_NEXT,
2267   .next_nodes = {
2268     [IP_LOCAL_NEXT_DROP] = "error-drop",
2269     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2270     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2271     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2272   },
2273 };
2274
2275 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2276
2277 void ip4_register_protocol (u32 protocol, u32 node_index)
2278 {
2279   vlib_main_t * vm = vlib_get_main();
2280   ip4_main_t * im = &ip4_main;
2281   ip_lookup_main_t * lm = &im->lookup_main;
2282
2283   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2284   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2285 }
2286
2287 static clib_error_t *
2288 show_ip_local_command_fn (vlib_main_t * vm,
2289                           unformat_input_t * input,
2290                          vlib_cli_command_t * cmd)
2291 {
2292   ip4_main_t * im = &ip4_main;
2293   ip_lookup_main_t * lm = &im->lookup_main;
2294   int i;
2295
2296   vlib_cli_output (vm, "Protocols handled by ip4_local");
2297   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2298     {
2299       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2300         vlib_cli_output (vm, "%d", i);
2301     }
2302   return 0;
2303 }
2304
2305
2306
2307 VLIB_CLI_COMMAND (show_ip_local, static) = {
2308   .path = "show ip local",
2309   .function = show_ip_local_command_fn,
2310   .short_help = "Show ip local protocol table",
2311 };
2312
2313 static uword
2314 ip4_arp (vlib_main_t * vm,
2315          vlib_node_runtime_t * node,
2316          vlib_frame_t * frame)
2317 {
2318   vnet_main_t * vnm = vnet_get_main();
2319   ip4_main_t * im = &ip4_main;
2320   ip_lookup_main_t * lm = &im->lookup_main;
2321   u32 * from, * to_next_drop;
2322   uword n_left_from, n_left_to_next_drop, next_index;
2323   static f64 time_last_seed_change = -1e100;
2324   static u32 hash_seeds[3];
2325   static uword hash_bitmap[256 / BITS (uword)]; 
2326   f64 time_now;
2327
2328   if (node->flags & VLIB_NODE_FLAG_TRACE)
2329     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2330
2331   time_now = vlib_time_now (vm);
2332   if (time_now - time_last_seed_change > 1e-3)
2333     {
2334       uword i;
2335       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2336                                              sizeof (hash_seeds));
2337       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2338         hash_seeds[i] = r[i];
2339
2340       /* Mark all hash keys as been no-seen before. */
2341       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2342         hash_bitmap[i] = 0;
2343
2344       time_last_seed_change = time_now;
2345     }
2346
2347   from = vlib_frame_vector_args (frame);
2348   n_left_from = frame->n_vectors;
2349   next_index = node->cached_next_index;
2350   if (next_index == IP4_ARP_NEXT_DROP)
2351     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2352
2353   while (n_left_from > 0)
2354     {
2355       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2356                            to_next_drop, n_left_to_next_drop);
2357
2358       while (n_left_from > 0 && n_left_to_next_drop > 0)
2359         {
2360           vlib_buffer_t * p0;
2361           ip4_header_t * ip0;
2362           ethernet_header_t * eh0;
2363           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2364           uword bm0;
2365           ip_adjacency_t * adj0;
2366
2367           pi0 = from[0];
2368
2369           p0 = vlib_get_buffer (vm, pi0);
2370
2371           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2372           adj0 = ip_get_adjacency (lm, adj_index0);
2373           ip0 = vlib_buffer_get_current (p0);
2374
2375           /* If packet destination is not local, send ARP to next hop */
2376           if (adj0->arp.next_hop.ip4.as_u32)
2377             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2378
2379           /* 
2380            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2381            * rewrite to this packet, we need to skip it here.
2382            * Note, to distinguish from src IP addr *.8.6.*, we
2383            * check for a bcast eth dest instead of IPv4 version.
2384            */
2385           eh0 = (ethernet_header_t*)ip0;
2386           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2387             {
2388               u32 vlan_num = 0;
2389               u16 * etype = &eh0->type;
2390               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2391                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2392                 {
2393                   vlan_num += 1;
2394                   etype += 2; //vlan tag also 16 bits, same as etype
2395                 }
2396               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2397                 {
2398                   vlib_buffer_advance (
2399                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2400                   ip0 = vlib_buffer_get_current (p0);
2401                 }
2402             }
2403
2404           a0 = hash_seeds[0];
2405           b0 = hash_seeds[1];
2406           c0 = hash_seeds[2];
2407
2408           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2409           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2410
2411           a0 ^= ip0->dst_address.data_u32;
2412           b0 ^= sw_if_index0;
2413
2414           hash_v3_finalize32 (a0, b0, c0);
2415
2416           c0 &= BITS (hash_bitmap) - 1;
2417           c0 = c0 / BITS (uword);
2418           m0 = (uword) 1 << (c0 % BITS (uword));
2419
2420           bm0 = hash_bitmap[c0];
2421           drop0 = (bm0 & m0) != 0;
2422
2423           /* Mark it as seen. */
2424           hash_bitmap[c0] = bm0 | m0;
2425
2426           from += 1;
2427           n_left_from -= 1;
2428           to_next_drop[0] = pi0;
2429           to_next_drop += 1;
2430           n_left_to_next_drop -= 1;
2431
2432           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2433
2434           if (drop0)
2435             continue;
2436
2437           /* 
2438            * Can happen if the control-plane is programming tables
2439            * with traffic flowing; at least that's today's lame excuse.
2440            */
2441           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2442             {
2443               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2444             }
2445           else
2446           /* Send ARP request. */
2447           {
2448             u32 bi0 = 0;
2449             vlib_buffer_t * b0;
2450             ethernet_arp_header_t * h0;
2451             vnet_hw_interface_t * hw_if0;
2452
2453             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2454
2455             /* Add rewrite/encap string for ARP packet. */
2456             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2457
2458             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2459
2460             /* Src ethernet address in ARP header. */
2461             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2462                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2463
2464             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2465                 //No source address available
2466                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2467                 vlib_buffer_free(vm, &bi0, 1);
2468                 continue;
2469             }
2470
2471             /* Copy in destination address we are requesting. */
2472             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2473
2474             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2475             b0 = vlib_get_buffer (vm, bi0);
2476             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2477
2478             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2479
2480             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2481           }
2482         }
2483
2484       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2485     }
2486
2487   return frame->n_vectors;
2488 }
2489
2490 static char * ip4_arp_error_strings[] = {
2491   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2492   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2493   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2494   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2495   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2496   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2497 };
2498
2499 VLIB_REGISTER_NODE (ip4_arp_node) = {
2500   .function = ip4_arp,
2501   .name = "ip4-arp",
2502   .vector_size = sizeof (u32),
2503
2504   .format_trace = format_ip4_forward_next_trace,
2505
2506   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2507   .error_strings = ip4_arp_error_strings,
2508
2509   .n_next_nodes = IP4_ARP_N_NEXT,
2510   .next_nodes = {
2511     [IP4_ARP_NEXT_DROP] = "error-drop",
2512   },
2513 };
2514
2515 #define foreach_notrace_ip4_arp_error           \
2516 _(DROP)                                         \
2517 _(REQUEST_SENT)                                 \
2518 _(REPLICATE_DROP)                               \
2519 _(REPLICATE_FAIL)
2520
2521 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2522 {
2523   vlib_node_runtime_t *rt = 
2524     vlib_node_get_runtime (vm, ip4_arp_node.index);
2525
2526   /* don't trace ARP request packets */
2527 #define _(a)                                    \
2528     vnet_pcap_drop_trace_filter_add_del         \
2529         (rt->errors[IP4_ARP_ERROR_##a],         \
2530          1 /* is_add */);
2531     foreach_notrace_ip4_arp_error;
2532 #undef _
2533   return 0;
2534 }
2535
2536 VLIB_INIT_FUNCTION(arp_notrace_init);
2537
2538
2539 /* Send an ARP request to see if given destination is reachable on given interface. */
2540 clib_error_t *
2541 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2542 {
2543   vnet_main_t * vnm = vnet_get_main();
2544   ip4_main_t * im = &ip4_main;
2545   ethernet_arp_header_t * h;
2546   ip4_address_t * src;
2547   ip_interface_address_t * ia;
2548   ip_adjacency_t * adj;
2549   vnet_hw_interface_t * hi;
2550   vnet_sw_interface_t * si;
2551   vlib_buffer_t * b;
2552   u32 bi = 0;
2553
2554   si = vnet_get_sw_interface (vnm, sw_if_index);
2555
2556   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2557     {
2558       return clib_error_return (0, "%U: interface %U down",
2559                                 format_ip4_address, dst, 
2560                                 format_vnet_sw_if_index_name, vnm, 
2561                                 sw_if_index);
2562     }
2563
2564   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2565   if (! src)
2566     {
2567       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2568       return clib_error_return 
2569         (0, "no matching interface address for destination %U (interface %U)",
2570          format_ip4_address, dst,
2571          format_vnet_sw_if_index_name, vnm, sw_if_index);
2572     }
2573
2574   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2575
2576   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2577
2578   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2579
2580   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2581
2582   h->ip4_over_ethernet[0].ip4 = src[0];
2583   h->ip4_over_ethernet[1].ip4 = dst[0];
2584
2585   b = vlib_get_buffer (vm, bi);
2586   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2587
2588   /* Add encapsulation string for software interface (e.g. ethernet header). */
2589   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2590   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2591
2592   {
2593     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2594     u32 * to_next = vlib_frame_vector_args (f);
2595     to_next[0] = bi;
2596     f->n_vectors = 1;
2597     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2598   }
2599
2600   return /* no error */ 0;
2601 }
2602
2603 typedef enum {
2604   IP4_REWRITE_NEXT_DROP,
2605   IP4_REWRITE_NEXT_ARP,
2606   IP4_REWRITE_NEXT_ICMP_ERROR,
2607 } ip4_rewrite_next_t;
2608
2609 always_inline uword
2610 ip4_rewrite_inline (vlib_main_t * vm,
2611                     vlib_node_runtime_t * node,
2612                     vlib_frame_t * frame,
2613                     int rewrite_for_locally_received_packets)
2614 {
2615   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2616   u32 * from = vlib_frame_vector_args (frame);
2617   u32 n_left_from, n_left_to_next, * to_next, next_index;
2618   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2619   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2620
2621   n_left_from = frame->n_vectors;
2622   next_index = node->cached_next_index;
2623   u32 cpu_index = os_get_cpu_number();
2624   
2625   while (n_left_from > 0)
2626     {
2627       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2628
2629       while (n_left_from >= 4 && n_left_to_next >= 2)
2630         {
2631           ip_adjacency_t * adj0, * adj1;
2632           vlib_buffer_t * p0, * p1;
2633           ip4_header_t * ip0, * ip1;
2634           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2635           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2636           u32 next0_override, next1_override;
2637       
2638           if (rewrite_for_locally_received_packets)
2639               next0_override = next1_override = 0;
2640
2641           /* Prefetch next iteration. */
2642           {
2643             vlib_buffer_t * p2, * p3;
2644
2645             p2 = vlib_get_buffer (vm, from[2]);
2646             p3 = vlib_get_buffer (vm, from[3]);
2647
2648             vlib_prefetch_buffer_header (p2, STORE);
2649             vlib_prefetch_buffer_header (p3, STORE);
2650
2651             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2652             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2653           }
2654
2655           pi0 = to_next[0] = from[0];
2656           pi1 = to_next[1] = from[1];
2657
2658           from += 2;
2659           n_left_from -= 2;
2660           to_next += 2;
2661           n_left_to_next -= 2;
2662       
2663           p0 = vlib_get_buffer (vm, pi0);
2664           p1 = vlib_get_buffer (vm, pi1);
2665
2666           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2667           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2668
2669           /* We should never rewrite a pkt using the MISS adjacency */
2670           ASSERT(adj_index0 && adj_index1);
2671
2672           ip0 = vlib_buffer_get_current (p0);
2673           ip1 = vlib_buffer_get_current (p1);
2674
2675           error0 = error1 = IP4_ERROR_NONE;
2676           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2677
2678           /* Decrement TTL & update checksum.
2679              Works either endian, so no need for byte swap. */
2680           if (! rewrite_for_locally_received_packets)
2681             {
2682               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2683
2684               /* Input node should have reject packets with ttl 0. */
2685               ASSERT (ip0->ttl > 0);
2686               ASSERT (ip1->ttl > 0);
2687
2688               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2689               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2690
2691               checksum0 += checksum0 >= 0xffff;
2692               checksum1 += checksum1 >= 0xffff;
2693
2694               ip0->checksum = checksum0;
2695               ip1->checksum = checksum1;
2696
2697               ttl0 -= 1;
2698               ttl1 -= 1;
2699
2700               ip0->ttl = ttl0;
2701               ip1->ttl = ttl1;
2702
2703               /*
2704                * If the ttl drops below 1 when forwarding, generate
2705                * an ICMP response.
2706                */
2707               if (PREDICT_FALSE(ttl0 <= 0))
2708                 {
2709                   error0 = IP4_ERROR_TIME_EXPIRED;
2710                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2711                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2712                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2713                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2714                 }
2715               if (PREDICT_FALSE(ttl1 <= 0))
2716                 {
2717                   error1 = IP4_ERROR_TIME_EXPIRED;
2718                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2719                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2720                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2721                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2722                 }
2723
2724               /* Verify checksum. */
2725               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2726               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2727             }
2728
2729           /* Rewrite packet header and updates lengths. */
2730           adj0 = ip_get_adjacency (lm, adj_index0);
2731           adj1 = ip_get_adjacency (lm, adj_index1);
2732       
2733           if (rewrite_for_locally_received_packets)
2734             {
2735               /*
2736                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2737                * we end up here with a local adjacency in hand
2738                * The local adj rewrite data is 0xfefe on purpose.
2739                * Bad engineer, no donut for you.
2740                */
2741               if (PREDICT_FALSE(adj0->lookup_next_index 
2742                                 == IP_LOOKUP_NEXT_LOCAL))
2743                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2744               if (PREDICT_FALSE(adj0->lookup_next_index
2745                                 == IP_LOOKUP_NEXT_ARP))
2746                 next0_override = IP4_REWRITE_NEXT_ARP;
2747               if (PREDICT_FALSE(adj1->lookup_next_index 
2748                                 == IP_LOOKUP_NEXT_LOCAL))
2749                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2750               if (PREDICT_FALSE(adj1->lookup_next_index
2751                                 == IP_LOOKUP_NEXT_ARP))
2752                 next1_override = IP4_REWRITE_NEXT_ARP;
2753             }
2754
2755           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2756           rw_len0 = adj0[0].rewrite_header.data_bytes;
2757           rw_len1 = adj1[0].rewrite_header.data_bytes;
2758
2759           /* Check MTU of outgoing interface. */
2760           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2761                     ? IP4_ERROR_MTU_EXCEEDED
2762                     : error0);
2763           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2764                     ? IP4_ERROR_MTU_EXCEEDED
2765                     : error1);
2766
2767           next0 = (error0 == IP4_ERROR_NONE)
2768             ? adj0[0].rewrite_header.next_index : next0;
2769
2770           if (rewrite_for_locally_received_packets)
2771               next0 = next0 && next0_override ? next0_override : next0;
2772
2773           next1 = (error1 == IP4_ERROR_NONE)
2774             ? adj1[0].rewrite_header.next_index : next1;
2775
2776           if (rewrite_for_locally_received_packets)
2777               next1 = next1 && next1_override ? next1_override : next1;
2778
2779           /* 
2780            * We've already accounted for an ethernet_header_t elsewhere
2781            */
2782           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2783               vlib_increment_combined_counter 
2784                   (&lm->adjacency_counters,
2785                    cpu_index, adj_index0, 
2786                    /* packet increment */ 0,
2787                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2788
2789           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2790               vlib_increment_combined_counter 
2791                   (&lm->adjacency_counters,
2792                    cpu_index, adj_index1, 
2793                    /* packet increment */ 0,
2794                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2795
2796           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2797            * to see the IP headerr */
2798           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2799             {
2800               p0->current_data -= rw_len0;
2801               p0->current_length += rw_len0;
2802               p0->error = error_node->errors[error0];
2803               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2804                   adj0[0].rewrite_header.sw_if_index;
2805             }
2806           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2807             {
2808               p1->current_data -= rw_len1;
2809               p1->current_length += rw_len1;
2810               p1->error = error_node->errors[error1];
2811               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2812                   adj1[0].rewrite_header.sw_if_index;
2813             }
2814
2815           /* Guess we are only writing on simple Ethernet header. */
2816           vnet_rewrite_two_headers (adj0[0], adj1[0],
2817                                     ip0, ip1,
2818                                     sizeof (ethernet_header_t));
2819       
2820           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2821                                            to_next, n_left_to_next,
2822                                            pi0, pi1, next0, next1);
2823         }
2824
2825       while (n_left_from > 0 && n_left_to_next > 0)
2826         {
2827           ip_adjacency_t * adj0;
2828           vlib_buffer_t * p0;
2829           ip4_header_t * ip0;
2830           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2831           u32 next0_override;
2832       
2833           if (rewrite_for_locally_received_packets)
2834               next0_override = 0;
2835
2836           pi0 = to_next[0] = from[0];
2837
2838           p0 = vlib_get_buffer (vm, pi0);
2839
2840           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2841
2842           /* We should never rewrite a pkt using the MISS adjacency */
2843           ASSERT(adj_index0);
2844
2845           adj0 = ip_get_adjacency (lm, adj_index0);
2846       
2847           ip0 = vlib_buffer_get_current (p0);
2848
2849           error0 = IP4_ERROR_NONE;
2850           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2851
2852           /* Decrement TTL & update checksum. */
2853           if (! rewrite_for_locally_received_packets)
2854             {
2855               i32 ttl0 = ip0->ttl;
2856
2857               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2858
2859               checksum0 += checksum0 >= 0xffff;
2860
2861               ip0->checksum = checksum0;
2862
2863               ASSERT (ip0->ttl > 0);
2864
2865               ttl0 -= 1;
2866
2867               ip0->ttl = ttl0;
2868
2869               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2870
2871               if (PREDICT_FALSE(ttl0 <= 0))
2872                 {
2873                   /*
2874                    * If the ttl drops below 1 when forwarding, generate
2875                    * an ICMP response.
2876                    */
2877                   error0 = IP4_ERROR_TIME_EXPIRED;
2878                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2879                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2880                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2881                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2882                 }
2883             }
2884
2885           if (rewrite_for_locally_received_packets)
2886             {
2887               /*
2888                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2889                * we end up here with a local adjacency in hand
2890                * The local adj rewrite data is 0xfefe on purpose.
2891                * Bad engineer, no donut for you.
2892                */
2893               if (PREDICT_FALSE(adj0->lookup_next_index 
2894                                 == IP_LOOKUP_NEXT_LOCAL))
2895                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2896               /* 
2897                * We have to override the next_index in ARP adjacencies,
2898                * because they're set up for ip4-arp, not this node...
2899                */
2900               if (PREDICT_FALSE(adj0->lookup_next_index
2901                                 == IP_LOOKUP_NEXT_ARP))
2902                 next0_override = IP4_REWRITE_NEXT_ARP;
2903             }
2904
2905           /* Guess we are only writing on simple Ethernet header. */
2906           vnet_rewrite_one_header (adj0[0], ip0, 
2907                                    sizeof (ethernet_header_t));
2908           
2909           /* Update packet buffer attributes/set output interface. */
2910           rw_len0 = adj0[0].rewrite_header.data_bytes;
2911           
2912           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2913               vlib_increment_combined_counter 
2914                   (&lm->adjacency_counters,
2915                    cpu_index, adj_index0, 
2916                    /* packet increment */ 0,
2917                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2918           
2919           /* Check MTU of outgoing interface. */
2920           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2921                     > adj0[0].rewrite_header.max_l3_packet_bytes
2922                     ? IP4_ERROR_MTU_EXCEEDED
2923                     : error0);
2924
2925           p0->error = error_node->errors[error0];
2926
2927           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2928            * to see the IP headerr */
2929           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2930             {
2931               p0->current_data -= rw_len0;
2932               p0->current_length += rw_len0;
2933
2934               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2935                   adj0[0].rewrite_header.sw_if_index;
2936               next0 = adj0[0].rewrite_header.next_index;
2937             }
2938
2939           if (rewrite_for_locally_received_packets)
2940               next0 = next0 && next0_override ? next0_override : next0;
2941
2942           from += 1;
2943           n_left_from -= 1;
2944           to_next += 1;
2945           n_left_to_next -= 1;
2946       
2947           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2948                                            to_next, n_left_to_next,
2949                                            pi0, next0);
2950         }
2951   
2952       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2953     }
2954
2955   /* Need to do trace after rewrites to pick up new packet data. */
2956   if (node->flags & VLIB_NODE_FLAG_TRACE)
2957     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2958
2959   return frame->n_vectors;
2960 }
2961
2962
2963 /** \brief IPv4 transit rewrite node.
2964     @node ip4-rewrite-transit
2965
2966     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2967     header checksum, fetch the ip adjacency, check the outbound mtu,
2968     apply the adjacency rewrite, and send pkts to the adjacency
2969     rewrite header's rewrite_next_index.
2970
2971     @param vm vlib_main_t corresponding to the current thread
2972     @param node vlib_node_runtime_t
2973     @param frame vlib_frame_t whose contents should be dispatched
2974
2975     @par Graph mechanics: buffer metadata, next index usage
2976
2977     @em Uses:
2978     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2979         - the rewrite adjacency index
2980     - <code>adj->lookup_next_index</code>
2981         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2982           the packet will be dropped. 
2983     - <code>adj->rewrite_header</code>
2984         - Rewrite string length, rewrite string, next_index
2985
2986     @em Sets:
2987     - <code>b->current_data, b->current_length</code>
2988         - Updated net of applying the rewrite string
2989
2990     <em>Next Indices:</em>
2991     - <code> adj->rewrite_header.next_index </code>
2992       or @c error-drop 
2993 */
2994 static uword
2995 ip4_rewrite_transit (vlib_main_t * vm,
2996                      vlib_node_runtime_t * node,
2997                      vlib_frame_t * frame)
2998 {
2999   return ip4_rewrite_inline (vm, node, frame,
3000                              /* rewrite_for_locally_received_packets */ 0);
3001 }
3002
3003 /** \brief IPv4 local rewrite node.
3004     @node ip4-rewrite-local
3005
3006     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3007     the outbound interface mtu, apply the adjacency rewrite, and send
3008     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3009     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3010     dst = interface addr."
3011
3012     @param vm vlib_main_t corresponding to the current thread
3013     @param node vlib_node_runtime_t
3014     @param frame vlib_frame_t whose contents should be dispatched
3015
3016     @par Graph mechanics: buffer metadata, next index usage
3017
3018     @em Uses:
3019     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3020         - the rewrite adjacency index
3021     - <code>adj->lookup_next_index</code>
3022         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3023           the packet will be dropped. 
3024     - <code>adj->rewrite_header</code>
3025         - Rewrite string length, rewrite string, next_index
3026
3027     @em Sets:
3028     - <code>b->current_data, b->current_length</code>
3029         - Updated net of applying the rewrite string
3030
3031     <em>Next Indices:</em>
3032     - <code> adj->rewrite_header.next_index </code>
3033       or @c error-drop 
3034 */
3035
3036 static uword
3037 ip4_rewrite_local (vlib_main_t * vm,
3038                    vlib_node_runtime_t * node,
3039                    vlib_frame_t * frame)
3040 {
3041   return ip4_rewrite_inline (vm, node, frame,
3042                              /* rewrite_for_locally_received_packets */ 1);
3043 }
3044
3045 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3046   .function = ip4_rewrite_transit,
3047   .name = "ip4-rewrite-transit",
3048   .vector_size = sizeof (u32),
3049
3050   .format_trace = format_ip4_rewrite_trace,
3051
3052   .n_next_nodes = 3,
3053   .next_nodes = {
3054     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3055     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3056     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3057   },
3058 };
3059
3060 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3061
3062 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3063   .function = ip4_rewrite_local,
3064   .name = "ip4-rewrite-local",
3065   .vector_size = sizeof (u32),
3066
3067   .sibling_of = "ip4-rewrite-transit",
3068
3069   .format_trace = format_ip4_rewrite_trace,
3070
3071   .n_next_nodes = 0,
3072 };
3073
3074 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3075
3076 static clib_error_t *
3077 add_del_interface_table (vlib_main_t * vm,
3078                          unformat_input_t * input,
3079                          vlib_cli_command_t * cmd)
3080 {
3081   vnet_main_t * vnm = vnet_get_main();
3082   clib_error_t * error = 0;
3083   u32 sw_if_index, table_id;
3084
3085   sw_if_index = ~0;
3086
3087   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3088     {
3089       error = clib_error_return (0, "unknown interface `%U'",
3090                                  format_unformat_error, input);
3091       goto done;
3092     }
3093
3094   if (unformat (input, "%d", &table_id))
3095     ;
3096   else
3097     {
3098       error = clib_error_return (0, "expected table id `%U'",
3099                                  format_unformat_error, input);
3100       goto done;
3101     }
3102
3103   {
3104     ip4_main_t * im = &ip4_main;
3105     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3106
3107     if (fib) 
3108       {
3109         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3110         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3111     }
3112   }
3113
3114  done:
3115   return error;
3116 }
3117
3118 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3119   .path = "set interface ip table",
3120   .function = add_del_interface_table,
3121   .short_help = "Add/delete FIB table id for interface",
3122 };
3123
3124
3125 static uword
3126 ip4_lookup_multicast (vlib_main_t * vm,
3127                       vlib_node_runtime_t * node,
3128                       vlib_frame_t * frame)
3129 {
3130   ip4_main_t * im = &ip4_main;
3131   ip_lookup_main_t * lm = &im->lookup_main;
3132   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3133   u32 n_left_from, n_left_to_next, * from, * to_next;
3134   ip_lookup_next_t next;
3135   u32 cpu_index = os_get_cpu_number();
3136
3137   from = vlib_frame_vector_args (frame);
3138   n_left_from = frame->n_vectors;
3139   next = node->cached_next_index;
3140
3141   while (n_left_from > 0)
3142     {
3143       vlib_get_next_frame (vm, node, next,
3144                            to_next, n_left_to_next);
3145
3146       while (n_left_from >= 4 && n_left_to_next >= 2)
3147         {
3148           vlib_buffer_t * p0, * p1;
3149           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3150           ip_lookup_next_t next0, next1;
3151           ip4_header_t * ip0, * ip1;
3152           ip_adjacency_t * adj0, * adj1;
3153           u32 fib_index0, fib_index1;
3154           u32 flow_hash_config0, flow_hash_config1;
3155
3156           /* Prefetch next iteration. */
3157           {
3158             vlib_buffer_t * p2, * p3;
3159
3160             p2 = vlib_get_buffer (vm, from[2]);
3161             p3 = vlib_get_buffer (vm, from[3]);
3162
3163             vlib_prefetch_buffer_header (p2, LOAD);
3164             vlib_prefetch_buffer_header (p3, LOAD);
3165
3166             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3167             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3168           }
3169
3170           pi0 = to_next[0] = from[0];
3171           pi1 = to_next[1] = from[1];
3172
3173           p0 = vlib_get_buffer (vm, pi0);
3174           p1 = vlib_get_buffer (vm, pi1);
3175
3176           ip0 = vlib_buffer_get_current (p0);
3177           ip1 = vlib_buffer_get_current (p1);
3178
3179           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3180           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3181           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3182             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3183           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3184             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3185
3186           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3187                                               &ip0->dst_address, p0);
3188           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3189                                               &ip1->dst_address, p1);
3190
3191           adj0 = ip_get_adjacency (lm, adj_index0);
3192           adj1 = ip_get_adjacency (lm, adj_index1);
3193
3194           next0 = adj0->lookup_next_index;
3195           next1 = adj1->lookup_next_index;
3196
3197           flow_hash_config0 = 
3198               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3199
3200           flow_hash_config1 = 
3201               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3202
3203           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3204               (ip0, flow_hash_config0);
3205                                                                   
3206           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3207               (ip1, flow_hash_config1);
3208
3209           ASSERT (adj0->n_adj > 0);
3210           ASSERT (adj1->n_adj > 0);
3211           ASSERT (is_pow2 (adj0->n_adj));
3212           ASSERT (is_pow2 (adj1->n_adj));
3213           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3214           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3215
3216           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3217           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3218
3219           if (1) /* $$$$$$ HACK FIXME */
3220           vlib_increment_combined_counter 
3221               (cm, cpu_index, adj_index0, 1,
3222                vlib_buffer_length_in_chain (vm, p0));
3223           if (1) /* $$$$$$ HACK FIXME */
3224           vlib_increment_combined_counter 
3225               (cm, cpu_index, adj_index1, 1,
3226                vlib_buffer_length_in_chain (vm, p1));
3227
3228           from += 2;
3229           to_next += 2;
3230           n_left_to_next -= 2;
3231           n_left_from -= 2;
3232
3233           wrong_next = (next0 != next) + 2*(next1 != next);
3234           if (PREDICT_FALSE (wrong_next != 0))
3235             {
3236               switch (wrong_next)
3237                 {
3238                 case 1:
3239                   /* A B A */
3240                   to_next[-2] = pi1;
3241                   to_next -= 1;
3242                   n_left_to_next += 1;
3243                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3244                   break;
3245
3246                 case 2:
3247                   /* A A B */
3248                   to_next -= 1;
3249                   n_left_to_next += 1;
3250                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3251                   break;
3252
3253                 case 3:
3254                   /* A B C */
3255                   to_next -= 2;
3256                   n_left_to_next += 2;
3257                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3259                   if (next0 == next1)
3260                     {
3261                       /* A B B */
3262                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3263                       next = next1;
3264                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3265                     }
3266                 }
3267             }
3268         }
3269     
3270       while (n_left_from > 0 && n_left_to_next > 0)
3271         {
3272           vlib_buffer_t * p0;
3273           ip4_header_t * ip0;
3274           u32 pi0, adj_index0;
3275           ip_lookup_next_t next0;
3276           ip_adjacency_t * adj0;
3277           u32 fib_index0;
3278           u32 flow_hash_config0;
3279
3280           pi0 = from[0];
3281           to_next[0] = pi0;
3282
3283           p0 = vlib_get_buffer (vm, pi0);
3284
3285           ip0 = vlib_buffer_get_current (p0);
3286
3287           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3288                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3289           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3290               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3291           
3292           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3293                                               &ip0->dst_address, p0);
3294
3295           adj0 = ip_get_adjacency (lm, adj_index0);
3296
3297           next0 = adj0->lookup_next_index;
3298
3299           flow_hash_config0 = 
3300               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3301
3302           vnet_buffer (p0)->ip.flow_hash = 
3303             ip4_compute_flow_hash (ip0, flow_hash_config0);
3304
3305           ASSERT (adj0->n_adj > 0);
3306           ASSERT (is_pow2 (adj0->n_adj));
3307           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3308
3309           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3310
3311           if (1) /* $$$$$$ HACK FIXME */
3312               vlib_increment_combined_counter 
3313                   (cm, cpu_index, adj_index0, 1,
3314                    vlib_buffer_length_in_chain (vm, p0));
3315
3316           from += 1;
3317           to_next += 1;
3318           n_left_to_next -= 1;
3319           n_left_from -= 1;
3320
3321           if (PREDICT_FALSE (next0 != next))
3322             {
3323               n_left_to_next += 1;
3324               vlib_put_next_frame (vm, node, next, n_left_to_next);
3325               next = next0;
3326               vlib_get_next_frame (vm, node, next,
3327                                    to_next, n_left_to_next);
3328               to_next[0] = pi0;
3329               to_next += 1;
3330               n_left_to_next -= 1;
3331             }
3332         }
3333
3334       vlib_put_next_frame (vm, node, next, n_left_to_next);
3335     }
3336
3337   if (node->flags & VLIB_NODE_FLAG_TRACE)
3338       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3339
3340   return frame->n_vectors;
3341 }
3342
3343 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3344   .function = ip4_lookup_multicast,
3345   .name = "ip4-lookup-multicast",
3346   .vector_size = sizeof (u32),
3347   .sibling_of = "ip4-lookup",
3348   .format_trace = format_ip4_lookup_trace,
3349
3350   .n_next_nodes = 0,
3351 };
3352
3353 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3354
3355 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3356   .function = ip4_drop,
3357   .name = "ip4-multicast",
3358   .vector_size = sizeof (u32),
3359
3360   .format_trace = format_ip4_forward_next_trace,
3361
3362   .n_next_nodes = 1,
3363   .next_nodes = {
3364     [0] = "error-drop",
3365   },
3366 };
3367
3368 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3369 {
3370   ip4_main_t * im = &ip4_main;
3371   ip4_fib_mtrie_t * mtrie0;
3372   ip4_fib_mtrie_leaf_t leaf0;
3373   u32 adj_index0;
3374     
3375   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3376
3377   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3378   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3379   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3380   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3381   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3382   
3383   /* Handle default route. */
3384   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3385   
3386   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3387   
3388   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3389                                                   a, 
3390                                                   /* no_default_route */ 0);
3391 }
3392  
3393 static clib_error_t *
3394 test_lookup_command_fn (vlib_main_t * vm,
3395                         unformat_input_t * input,
3396                         vlib_cli_command_t * cmd)
3397 {
3398   u32 table_id = 0;
3399   f64 count = 1;
3400   u32 n;
3401   int i;
3402   ip4_address_t ip4_base_address;
3403   u64 errors = 0;
3404
3405   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3406       if (unformat (input, "table %d", &table_id))
3407         ;
3408       else if (unformat (input, "count %f", &count))
3409         ;
3410
3411       else if (unformat (input, "%U",
3412                          unformat_ip4_address, &ip4_base_address))
3413         ;
3414       else
3415         return clib_error_return (0, "unknown input `%U'",
3416                                   format_unformat_error, input);
3417   }
3418
3419   n = count;
3420
3421   for (i = 0; i < n; i++)
3422     {
3423       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3424         errors++;
3425
3426       ip4_base_address.as_u32 = 
3427         clib_host_to_net_u32 (1 + 
3428                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3429     }
3430
3431   if (errors) 
3432     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3433   else
3434     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3435
3436   return 0;
3437 }
3438
3439 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3440     .path = "test lookup",
3441     .short_help = "test lookup",
3442     .function = test_lookup_command_fn,
3443 };
3444
3445 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3446 {
3447   ip4_main_t * im4 = &ip4_main;
3448   ip4_fib_t * fib;
3449   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3450
3451   if (p == 0)
3452     return VNET_API_ERROR_NO_SUCH_FIB;
3453
3454   fib = vec_elt_at_index (im4->fibs, p[0]);
3455
3456   fib->flow_hash_config = flow_hash_config;
3457   return 0;
3458 }
3459  
3460 static clib_error_t *
3461 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3462                              unformat_input_t * input,
3463                              vlib_cli_command_t * cmd)
3464 {
3465   int matched = 0;
3466   u32 table_id = 0;
3467   u32 flow_hash_config = 0;
3468   int rv;
3469
3470   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3471     if (unformat (input, "table %d", &table_id))
3472       matched = 1;
3473 #define _(a,v) \
3474     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3475     foreach_flow_hash_bit
3476 #undef _
3477     else break;
3478   }
3479   
3480   if (matched == 0)
3481     return clib_error_return (0, "unknown input `%U'",
3482                               format_unformat_error, input);
3483   
3484   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3485   switch (rv)
3486     {
3487     case 0:
3488       break;
3489       
3490     case VNET_API_ERROR_NO_SUCH_FIB:
3491       return clib_error_return (0, "no such FIB table %d", table_id);
3492       
3493     default:
3494       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3495       break;
3496     }
3497   
3498   return 0;
3499 }
3500  
3501 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3502   .path = "set ip flow-hash",
3503   .short_help = 
3504   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3505   .function = set_ip_flow_hash_command_fn,
3506 };
3507  
3508 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3509                                  u32 table_index)
3510 {
3511   vnet_main_t * vnm = vnet_get_main();
3512   vnet_interface_main_t * im = &vnm->interface_main;
3513   ip4_main_t * ipm = &ip4_main;
3514   ip_lookup_main_t * lm = &ipm->lookup_main;
3515   vnet_classify_main_t * cm = &vnet_classify_main;
3516
3517   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3518     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3519
3520   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3521     return VNET_API_ERROR_NO_SUCH_ENTRY;
3522
3523   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3524   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3525
3526   return 0;
3527 }
3528
3529 static clib_error_t *
3530 set_ip_classify_command_fn (vlib_main_t * vm,
3531                             unformat_input_t * input,
3532                             vlib_cli_command_t * cmd)
3533 {
3534   u32 table_index = ~0;
3535   int table_index_set = 0;
3536   u32 sw_if_index = ~0;
3537   int rv;
3538   
3539   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3540     if (unformat (input, "table-index %d", &table_index))
3541       table_index_set = 1;
3542     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3543                        vnet_get_main(), &sw_if_index))
3544       ;
3545     else
3546       break;
3547   }
3548       
3549   if (table_index_set == 0)
3550     return clib_error_return (0, "classify table-index must be specified");
3551
3552   if (sw_if_index == ~0)
3553     return clib_error_return (0, "interface / subif must be specified");
3554
3555   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3556
3557   switch (rv)
3558     {
3559     case 0:
3560       break;
3561
3562     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3563       return clib_error_return (0, "No such interface");
3564
3565     case VNET_API_ERROR_NO_SUCH_ENTRY:
3566       return clib_error_return (0, "No such classifier table");
3567     }
3568   return 0;
3569 }
3570
3571 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3572     .path = "set ip classify",
3573     .short_help = 
3574     "set ip classify intfc <int> table-index <index>",
3575     .function = set_ip_classify_command_fn,
3576 };
3577