VPP-142 - Follow up fix for shared_count in indirect adjacencies
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
197     }
198 }
199
200 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip4_fib_t * fib;
204   u32 dst_address, dst_address_length, adj_index, old_adj_index;
205   uword * hash, is_del;
206   ip4_add_del_route_callback_t * cb;
207
208   /* Either create new adjacency or use given one depending on arguments. */
209   if (a->n_add_adj > 0)
210     {
211       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
212       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
213     }
214   else
215     adj_index = a->adj_index;
216
217   dst_address = a->dst_address.data_u32;
218   dst_address_length = a->dst_address_length;
219   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
220
221   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
222   dst_address &= im->fib_masks[dst_address_length];
223
224   if (! fib->adj_index_by_dst_address[dst_address_length])
225     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
226
227   hash = fib->adj_index_by_dst_address[dst_address_length];
228
229   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
230
231   if (is_del)
232     {
233       fib->old_hash_values[0] = ~0;
234       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
235       fib->adj_index_by_dst_address[dst_address_length] = hash;
236
237       if (vec_len (im->add_del_route_callbacks) > 0
238           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
239         {
240           fib->new_hash_values[0] = ~0;
241           vec_foreach (cb, im->add_del_route_callbacks)
242             if ((a->flags & cb->required_flags) == cb->required_flags)
243               cb->function (im, cb->function_opaque,
244                             fib, a->flags,
245                             &a->dst_address, dst_address_length,
246                             fib->old_hash_values,
247                             fib->new_hash_values);
248         }
249     }
250   else
251     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
252                            adj_index);
253
254   old_adj_index = fib->old_hash_values[0];
255
256   /* Avoid spurious reference count increments */
257   if (old_adj_index == adj_index
258       && adj_index != ~0
259       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
260     {
261       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
262       if (adj->share_count > 0)
263         adj->share_count --;
264     }
265
266   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
267                                is_del ? old_adj_index : adj_index,
268                                is_del);
269
270   /* Delete old adjacency index if present and changed. */
271   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
272       && old_adj_index != ~0
273       && old_adj_index != adj_index)
274     ip_del_adjacency (lm, old_adj_index);
275 }
276
277
278 u32
279 ip4_route_get_next_hop_adj (ip4_main_t * im,
280                             u32 fib_index,
281                             ip4_address_t *next_hop,
282                             u32 next_hop_sw_if_index,
283                             u32 explicit_fib_index)
284 {
285   ip_lookup_main_t * lm = &im->lookup_main;
286   vnet_main_t * vnm = vnet_get_main();
287   uword * nh_hash, * nh_result;
288   int is_interface_next_hop;
289   u32 nh_adj_index;
290   ip4_fib_t * fib;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293
294   is_interface_next_hop = next_hop->data_u32 == 0;
295   if (is_interface_next_hop)
296     {
297       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
298       if (nh_result)
299           nh_adj_index = *nh_result;
300       else
301         {
302            ip_adjacency_t * adj;
303            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
304                                    &nh_adj_index);
305            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
306            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
307            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
308         }
309     }
310   else if (next_hop_sw_if_index == ~0)
311     {
312       /* next-hop is recursive. we always need a indirect adj
313        * for recursive paths. Any LPM we perform now will give
314        * us a valid adj, but without tracking the next-hop we
315        * have no way to keep it valid.
316        */
317       ip_adjacency_t add_adj;
318       memset (&add_adj, 0, sizeof(add_adj));
319       add_adj.n_adj = 1;
320       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
321       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
322       add_adj.explicit_fib_index = explicit_fib_index;
323       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
324     }
325   else
326     {
327       nh_hash = fib->adj_index_by_dst_address[32];
328       nh_result = hash_get (nh_hash, next_hop->data_u32);
329
330       /* Next hop must be known. */
331       if (! nh_result)
332         {
333           ip_adjacency_t * adj;
334
335           /* no /32 exists, get the longest prefix match */
336           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
337                                                     next_hop, 0);
338           adj = ip_get_adjacency (lm, nh_adj_index);
339           /* if ARP interface adjacency is present, we need to
340              install ARP adjaceny for specific next hop */
341           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
342               adj->arp.next_hop.ip4.as_u32 == 0)
343             {
344               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
345             }
346         }
347       else
348         {
349           nh_adj_index = *nh_result;
350         }
351     }
352
353   return (nh_adj_index);
354 }
355
356 void
357 ip4_add_del_route_next_hop (ip4_main_t * im,
358                             u32 flags,
359                             ip4_address_t * dst_address,
360                             u32 dst_address_length,
361                             ip4_address_t * next_hop,
362                             u32 next_hop_sw_if_index,
363                             u32 next_hop_weight, u32 adj_index, 
364                             u32 explicit_fib_index)
365 {
366   vnet_main_t * vnm = vnet_get_main();
367   ip_lookup_main_t * lm = &im->lookup_main;
368   u32 fib_index;
369   ip4_fib_t * fib;
370   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
371   u32 dst_adj_index, nh_adj_index;
372   uword * dst_hash, * dst_result;
373   ip_adjacency_t * dst_adj;
374   ip_multipath_adjacency_t * old_mp, * new_mp;
375   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
376   clib_error_t * error = 0;
377
378   if (explicit_fib_index == (u32)~0)
379       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
380   else
381       fib_index = explicit_fib_index;
382
383   fib = vec_elt_at_index (im->fibs, fib_index);
384
385   /* Lookup next hop to be added or deleted. */
386   if (adj_index == (u32)~0)
387     {
388         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
389                                                   next_hop,
390                                                   next_hop_sw_if_index,
391                                                   explicit_fib_index);
392     }
393   else
394     {
395       nh_adj_index = adj_index;
396     }
397   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
398   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
399
400   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
401   dst_result = hash_get (dst_hash, dst_address_u32);
402   if (dst_result)
403     {
404       dst_adj_index = dst_result[0];
405       dst_adj = ip_get_adjacency (lm, dst_adj_index);
406     }
407   else
408     {
409       /* For deletes destination must be known. */
410       if (is_del)
411         {
412           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
413           error = clib_error_return (0, "unknown destination %U/%d",
414                                      format_ip4_address, dst_address,
415                                      dst_address_length);
416           goto done;
417         }
418
419       dst_adj_index = ~0;
420       dst_adj = 0;
421     }
422
423   /* Ignore adds of X/32 with next hop of X. */
424   if (! is_del
425       && dst_address_length == 32
426       && dst_address->data_u32 == next_hop->data_u32 
427       && adj_index != (u32)~0)
428     {
429       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
430       error = clib_error_return (0, "prefix matches next hop %U/%d",
431                                  format_ip4_address, dst_address,
432                                  dst_address_length);
433       goto done;
434     }
435
436   /* Destination is not known and default weight is set so add route
437      to existing non-multipath adjacency */
438   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
439     {
440       /* create / delete additional mapping of existing adjacency */
441       ip4_add_del_route_args_t a;
442       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
443
444       a.table_index_or_table_id = fib_index;
445       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
446                  | IP4_ROUTE_FLAG_FIB_INDEX
447                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
448                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
449                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
450       a.dst_address = dst_address[0];
451       a.dst_address_length = dst_address_length;
452       a.adj_index = nh_adj_index;
453       a.add_adj = 0;
454       a.n_add_adj = 0;
455
456       ip4_add_del_route (im, &a);
457
458       /* adjust share count. This cannot be the only use of the adjacency 
459          unless next hop is an indiect adj where share count is already
460          incremented */
461       if (next_hop_sw_if_index != ~0) 
462         nh_adj->share_count += is_del ? -1 : 1;
463         
464       goto done;
465     }
466
467   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
468
469   if (! ip_multipath_adjacency_add_del_next_hop
470       (lm, is_del,
471        old_mp_adj_index,
472        nh_adj_index,
473        next_hop_weight,
474        &new_mp_adj_index))
475     {
476       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
477       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
478                                  format_ip4_address, next_hop);
479       goto done;
480     }
481   
482   old_mp = new_mp = 0;
483   if (old_mp_adj_index != ~0)
484     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
485   if (new_mp_adj_index != ~0)
486     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
487
488   if (old_mp != new_mp)
489     {
490       ip4_add_del_route_args_t a;
491       ip_adjacency_t * adj;
492
493       a.table_index_or_table_id = fib_index;
494       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
495                  | IP4_ROUTE_FLAG_FIB_INDEX
496                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
497                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
498       a.dst_address = dst_address[0];
499       a.dst_address_length = dst_address_length;
500       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
501       a.add_adj = 0;
502       a.n_add_adj = 0;
503
504       ip4_add_del_route (im, &a);
505
506       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
507       if (adj->n_adj == 1)
508         adj->share_count += is_del ? -1 : 1;
509     }
510
511  done:
512   if (error)
513     clib_error_report (error);
514 }
515
516 void *
517 ip4_get_route (ip4_main_t * im,
518                u32 table_index_or_table_id,
519                u32 flags,
520                u8 * address,
521                u32 address_length)
522 {
523   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
524   u32 dst_address = * (u32 *) address;
525   uword * hash, * p;
526
527   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
528   dst_address &= im->fib_masks[address_length];
529
530   hash = fib->adj_index_by_dst_address[address_length];
531   p = hash_get (hash, dst_address);
532   return (void *) p;
533 }
534
535 void
536 ip4_foreach_matching_route (ip4_main_t * im,
537                             u32 table_index_or_table_id,
538                             u32 flags,
539                             ip4_address_t * address,
540                             u32 address_length,
541                             ip4_address_t ** results,
542                             u8 ** result_lengths)
543 {
544   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
545   u32 dst_address = address->data_u32;
546   u32 this_length = address_length;
547   
548   if (*results)
549     _vec_len (*results) = 0;
550   if (*result_lengths)
551     _vec_len (*result_lengths) = 0;
552
553   while (this_length <= 32 && vec_len (results) == 0)
554     {
555       uword k, v;
556       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
557         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
558           {
559             ip4_address_t a;
560             a.data_u32 = k;
561             vec_add1 (*results, a);
562             vec_add1 (*result_lengths, this_length);
563           }
564       }));
565
566       this_length++;
567     }
568 }
569
570 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
571                                   u32 table_index_or_table_id,
572                                   u32 flags)
573 {
574   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
575   ip_lookup_main_t * lm = &im->lookup_main;
576   u32 i, l;
577   ip4_address_t a;
578   ip4_add_del_route_callback_t * cb;
579   static ip4_address_t * to_delete;
580
581   if (lm->n_adjacency_remaps == 0)
582     return;
583
584   for (l = 0; l <= 32; l++)
585     {
586       hash_pair_t * p;
587       uword * hash = fib->adj_index_by_dst_address[l];
588
589       if (hash_elts (hash) == 0)
590         continue;
591
592       if (to_delete)
593         _vec_len (to_delete) = 0;
594
595       hash_foreach_pair (p, hash, ({
596         u32 adj_index = p->value[0];
597         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
598
599         if (m)
600           {
601             /* Record destination address from hash key. */
602             a.data_u32 = p->key;
603
604             /* New adjacency points to nothing: so delete prefix. */
605             if (m == ~0)
606               vec_add1 (to_delete, a);
607             else
608               {
609                 /* Remap to new adjacency. */
610                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
611
612                 /* Set new adjacency value. */
613                 fib->new_hash_values[0] = p->value[0] = m - 1;
614
615                 vec_foreach (cb, im->add_del_route_callbacks)
616                   if ((flags & cb->required_flags) == cb->required_flags)
617                     cb->function (im, cb->function_opaque,
618                                   fib, flags | IP4_ROUTE_FLAG_ADD,
619                                   &a, l,
620                                   fib->old_hash_values,
621                                   fib->new_hash_values);
622               }
623           }
624       }));
625
626       fib->new_hash_values[0] = ~0;
627       for (i = 0; i < vec_len (to_delete); i++)
628         {
629           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
630           vec_foreach (cb, im->add_del_route_callbacks)
631             if ((flags & cb->required_flags) == cb->required_flags)
632               cb->function (im, cb->function_opaque,
633                             fib, flags | IP4_ROUTE_FLAG_DEL,
634                             &a, l,
635                             fib->old_hash_values,
636                             fib->new_hash_values);
637         }
638     }
639
640   /* Also remap adjacencies in mtrie. */
641   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
642
643   /* Reset mapping table. */
644   vec_zero (lm->adjacency_remap_table);
645
646   /* All remaps have been performed. */
647   lm->n_adjacency_remaps = 0;
648 }
649
650 void ip4_delete_matching_routes (ip4_main_t * im,
651                                  u32 table_index_or_table_id,
652                                  u32 flags,
653                                  ip4_address_t * address,
654                                  u32 address_length)
655 {
656   static ip4_address_t * matching_addresses;
657   static u8 * matching_address_lengths;
658   u32 l, i;
659   ip4_add_del_route_args_t a;
660
661   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
662   a.table_index_or_table_id = table_index_or_table_id;
663   a.adj_index = ~0;
664   a.add_adj = 0;
665   a.n_add_adj = 0;
666
667   for (l = address_length + 1; l <= 32; l++)
668     {
669       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
670                                   address,
671                                   l,
672                                   &matching_addresses,
673                                   &matching_address_lengths);
674       for (i = 0; i < vec_len (matching_addresses); i++)
675         {
676           a.dst_address = matching_addresses[i];
677           a.dst_address_length = matching_address_lengths[i];
678           ip4_add_del_route (im, &a);
679         }
680     }
681
682   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
683 }
684
685 void
686 ip4_forward_next_trace (vlib_main_t * vm,
687                         vlib_node_runtime_t * node,
688                         vlib_frame_t * frame,
689                         vlib_rx_or_tx_t which_adj_index);
690
691 always_inline uword
692 ip4_lookup_inline (vlib_main_t * vm,
693                    vlib_node_runtime_t * node,
694                    vlib_frame_t * frame,
695                    int lookup_for_responses_to_locally_received_packets,
696                    int is_indirect)
697 {
698   ip4_main_t * im = &ip4_main;
699   ip_lookup_main_t * lm = &im->lookup_main;
700   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
701   u32 n_left_from, n_left_to_next, * from, * to_next;
702   ip_lookup_next_t next;
703   u32 cpu_index = os_get_cpu_number();
704
705   from = vlib_frame_vector_args (frame);
706   n_left_from = frame->n_vectors;
707   next = node->cached_next_index;
708
709   while (n_left_from > 0)
710     {
711       vlib_get_next_frame (vm, node, next,
712                            to_next, n_left_to_next);
713
714       while (n_left_from >= 4 && n_left_to_next >= 2)
715         {
716           vlib_buffer_t * p0, * p1;
717           ip4_header_t * ip0, * ip1;
718           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
719           ip_lookup_next_t next0, next1;
720           ip_adjacency_t * adj0, * adj1;
721           ip4_fib_mtrie_t * mtrie0, * mtrie1;
722           ip4_fib_mtrie_leaf_t leaf0, leaf1;
723           ip4_address_t * dst_addr0, *dst_addr1;
724           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
725           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
726           u32 flow_hash_config0, flow_hash_config1;
727           u32 hash_c0, hash_c1;
728           u32 wrong_next;
729
730           /* Prefetch next iteration. */
731           {
732             vlib_buffer_t * p2, * p3;
733
734             p2 = vlib_get_buffer (vm, from[2]);
735             p3 = vlib_get_buffer (vm, from[3]);
736
737             vlib_prefetch_buffer_header (p2, LOAD);
738             vlib_prefetch_buffer_header (p3, LOAD);
739
740             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
741             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
742           }
743
744           pi0 = to_next[0] = from[0];
745           pi1 = to_next[1] = from[1];
746
747           p0 = vlib_get_buffer (vm, pi0);
748           p1 = vlib_get_buffer (vm, pi1);
749
750           ip0 = vlib_buffer_get_current (p0);
751           ip1 = vlib_buffer_get_current (p1);
752
753           if (is_indirect)
754             {
755               ip_adjacency_t * iadj0, * iadj1;
756               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
757               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
758               dst_addr0 = &iadj0->indirect.next_hop.ip4;
759               dst_addr1 = &iadj1->indirect.next_hop.ip4;
760             }
761           else
762             {
763               dst_addr0 = &ip0->dst_address;
764               dst_addr1 = &ip1->dst_address;
765             }
766
767           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
768           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
769           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
770             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
771           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
772             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
773
774
775           if (! lookup_for_responses_to_locally_received_packets)
776             {
777               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
778               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
779
780               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
781
782               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
783               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
784             }
785
786           tcp0 = (void *) (ip0 + 1);
787           tcp1 = (void *) (ip1 + 1);
788
789           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
790                          || ip0->protocol == IP_PROTOCOL_UDP);
791           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
792                          || ip1->protocol == IP_PROTOCOL_UDP);
793
794           if (! lookup_for_responses_to_locally_received_packets)
795             {
796               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
797               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
798             }
799
800           if (! lookup_for_responses_to_locally_received_packets)
801             {
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
804             }
805
806           if (! lookup_for_responses_to_locally_received_packets)
807             {
808               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
809               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
810             }
811
812           if (lookup_for_responses_to_locally_received_packets)
813             {
814               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
815               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
816             }
817           else
818             {
819               /* Handle default route. */
820               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
821               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
822
823               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
824               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
825             }
826
827           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
828                                                            dst_addr0,
829                                                            /* no_default_route */ 0));
830           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
831                                                            dst_addr1,
832                                                            /* no_default_route */ 0));
833           adj0 = ip_get_adjacency (lm, adj_index0);
834           adj1 = ip_get_adjacency (lm, adj_index1);
835
836           next0 = adj0->lookup_next_index;
837           next1 = adj1->lookup_next_index;
838
839           /* Use flow hash to compute multipath adjacency. */
840           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
841           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
842           if (PREDICT_FALSE (adj0->n_adj > 1))
843             {
844               flow_hash_config0 = 
845                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
846               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
847                 ip4_compute_flow_hash (ip0, flow_hash_config0);
848             }
849           if (PREDICT_FALSE(adj1->n_adj > 1))
850             {
851               flow_hash_config1 = 
852                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
853               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
854                 ip4_compute_flow_hash (ip1, flow_hash_config1);
855             }
856
857           ASSERT (adj0->n_adj > 0);
858           ASSERT (adj1->n_adj > 0);
859           ASSERT (is_pow2 (adj0->n_adj));
860           ASSERT (is_pow2 (adj1->n_adj));
861           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
862           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
863
864           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
865           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
866
867           vlib_increment_combined_counter 
868               (cm, cpu_index, adj_index0, 1,
869                vlib_buffer_length_in_chain (vm, p0) 
870                + sizeof(ethernet_header_t));
871           vlib_increment_combined_counter 
872               (cm, cpu_index, adj_index1, 1,
873                vlib_buffer_length_in_chain (vm, p1)
874                + sizeof(ethernet_header_t));
875
876           from += 2;
877           to_next += 2;
878           n_left_to_next -= 2;
879           n_left_from -= 2;
880
881           wrong_next = (next0 != next) + 2*(next1 != next);
882           if (PREDICT_FALSE (wrong_next != 0))
883             {
884               switch (wrong_next)
885                 {
886                 case 1:
887                   /* A B A */
888                   to_next[-2] = pi1;
889                   to_next -= 1;
890                   n_left_to_next += 1;
891                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
892                   break;
893
894                 case 2:
895                   /* A A B */
896                   to_next -= 1;
897                   n_left_to_next += 1;
898                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
899                   break;
900
901                 case 3:
902                   /* A B C */
903                   to_next -= 2;
904                   n_left_to_next += 2;
905                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
906                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
907                   if (next0 == next1)
908                     {
909                       /* A B B */
910                       vlib_put_next_frame (vm, node, next, n_left_to_next);
911                       next = next1;
912                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
913                     }
914                 }
915             }
916         }
917     
918       while (n_left_from > 0 && n_left_to_next > 0)
919         {
920           vlib_buffer_t * p0;
921           ip4_header_t * ip0;
922           __attribute__((unused)) tcp_header_t * tcp0;
923           ip_lookup_next_t next0;
924           ip_adjacency_t * adj0;
925           ip4_fib_mtrie_t * mtrie0;
926           ip4_fib_mtrie_leaf_t leaf0;
927           ip4_address_t * dst_addr0;
928           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
929           u32 flow_hash_config0, hash_c0;
930
931           pi0 = from[0];
932           to_next[0] = pi0;
933
934           p0 = vlib_get_buffer (vm, pi0);
935
936           ip0 = vlib_buffer_get_current (p0);
937
938           if (is_indirect)
939             {
940               ip_adjacency_t * iadj0;
941               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
942               dst_addr0 = &iadj0->indirect.next_hop.ip4;
943             }
944           else
945             {
946               dst_addr0 = &ip0->dst_address;
947             }
948
949           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
950           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
951             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
952
953           if (! lookup_for_responses_to_locally_received_packets)
954             {
955               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
956
957               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
958
959               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
960             }
961
962           tcp0 = (void *) (ip0 + 1);
963
964           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
965                          || ip0->protocol == IP_PROTOCOL_UDP);
966
967           if (! lookup_for_responses_to_locally_received_packets)
968             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
969
970           if (! lookup_for_responses_to_locally_received_packets)
971             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
972
973           if (! lookup_for_responses_to_locally_received_packets)
974             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
975
976           if (lookup_for_responses_to_locally_received_packets)
977             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
978           else
979             {
980               /* Handle default route. */
981               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
982               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
983             }
984
985           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
986                                                            dst_addr0,
987                                                            /* no_default_route */ 0));
988
989           adj0 = ip_get_adjacency (lm, adj_index0);
990
991           next0 = adj0->lookup_next_index;
992
993           /* Use flow hash to compute multipath adjacency. */
994           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
995           if (PREDICT_FALSE(adj0->n_adj > 1))
996             {
997               flow_hash_config0 = 
998                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
999
1000               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1001                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1002             }
1003
1004           ASSERT (adj0->n_adj > 0);
1005           ASSERT (is_pow2 (adj0->n_adj));
1006           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1007
1008           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1009
1010           vlib_increment_combined_counter 
1011               (cm, cpu_index, adj_index0, 1,
1012                vlib_buffer_length_in_chain (vm, p0)
1013                + sizeof(ethernet_header_t));
1014
1015           from += 1;
1016           to_next += 1;
1017           n_left_to_next -= 1;
1018           n_left_from -= 1;
1019
1020           if (PREDICT_FALSE (next0 != next))
1021             {
1022               n_left_to_next += 1;
1023               vlib_put_next_frame (vm, node, next, n_left_to_next);
1024               next = next0;
1025               vlib_get_next_frame (vm, node, next,
1026                                    to_next, n_left_to_next);
1027               to_next[0] = pi0;
1028               to_next += 1;
1029               n_left_to_next -= 1;
1030             }
1031         }
1032
1033       vlib_put_next_frame (vm, node, next, n_left_to_next);
1034     }
1035
1036   if (node->flags & VLIB_NODE_FLAG_TRACE)
1037     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1038
1039   return frame->n_vectors;
1040 }
1041
1042 /** \brief IPv4 lookup node.
1043     @node ip4-lookup
1044
1045     This is the main IPv4 lookup dispatch node.
1046
1047     @param vm vlib_main_t corresponding to the current thread
1048     @param node vlib_node_runtime_t
1049     @param frame vlib_frame_t whose contents should be dispatched
1050
1051     @par Graph mechanics: buffer metadata, next index usage
1052
1053     @em Uses:
1054     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1055         - Indicates the @c sw_if_index value of the interface that the
1056           packet was received on.
1057     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1058         - When the value is @c ~0 then the node performs a longest prefix
1059           match (LPM) for the packet destination address in the FIB attached
1060           to the receive interface.
1061         - Otherwise perform LPM for the packet destination address in the
1062           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1063           value (0, 1, ...) and not a VRF id.
1064
1065     @em Sets:
1066     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1067         - The lookup result adjacency index.
1068
1069     <em>Next Index:</em>
1070     - Dispatches the packet to the node index found in
1071       ip_adjacency_t @c adj->lookup_next_index
1072       (where @c adj is the lookup result adjacency).
1073 */
1074 static uword
1075 ip4_lookup (vlib_main_t * vm,
1076             vlib_node_runtime_t * node,
1077             vlib_frame_t * frame)
1078 {
1079   return ip4_lookup_inline (vm, node, frame,
1080                             /* lookup_for_responses_to_locally_received_packets */ 0,
1081                             /* is_indirect */ 0);
1082
1083 }
1084
1085 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1086                                         ip_adjacency_t * adj,
1087                                         u32 sw_if_index,
1088                                         u32 if_address_index)
1089 {
1090   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1091   ip_lookup_next_t n;
1092   vnet_l3_packet_type_t packet_type;
1093   u32 node_index;
1094
1095   if (hw->hw_class_index == ethernet_hw_interface_class.index
1096       || hw->hw_class_index == srp_hw_interface_class.index)
1097     {
1098       /* 
1099        * We have a bit of a problem in this case. ip4-arp uses
1100        * the rewrite_header.next_index to hand pkts to the
1101        * indicated inteface output node. We can end up in
1102        * ip4_rewrite_local, too, which also pays attention to 
1103        * rewrite_header.next index. Net result: a hack in
1104        * ip4_rewrite_local...
1105        */
1106       n = IP_LOOKUP_NEXT_ARP;
1107       node_index = ip4_arp_node.index;
1108       adj->if_address_index = if_address_index;
1109       adj->arp.next_hop.ip4.as_u32 = 0;
1110       ip46_address_reset(&adj->arp.next_hop);
1111       packet_type = VNET_L3_PACKET_TYPE_ARP;
1112     }
1113   else
1114     {
1115       n = IP_LOOKUP_NEXT_REWRITE;
1116       node_index = ip4_rewrite_node.index;
1117       packet_type = VNET_L3_PACKET_TYPE_IP4;
1118     }
1119
1120   adj->lookup_next_index = n;
1121   vnet_rewrite_for_sw_interface
1122     (vnm,
1123      packet_type,
1124      sw_if_index,
1125      node_index,
1126      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1127      &adj->rewrite_header,
1128      sizeof (adj->rewrite_data));
1129 }
1130
1131 static void
1132 ip4_add_interface_routes (u32 sw_if_index,
1133                           ip4_main_t * im, u32 fib_index,
1134                           ip_interface_address_t * a)
1135 {
1136   vnet_main_t * vnm = vnet_get_main();
1137   ip_lookup_main_t * lm = &im->lookup_main;
1138   ip_adjacency_t * adj;
1139   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1140   ip4_add_del_route_args_t x;
1141   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1142   u32 classify_table_index;
1143
1144   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1145   x.table_index_or_table_id = fib_index;
1146   x.flags = (IP4_ROUTE_FLAG_ADD
1147              | IP4_ROUTE_FLAG_FIB_INDEX
1148              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1149   x.dst_address = address[0];
1150   x.dst_address_length = a->address_length;
1151   x.n_add_adj = 0;
1152   x.add_adj = 0;
1153
1154   a->neighbor_probe_adj_index = ~0;
1155   if (a->address_length < 32)
1156     {
1157       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1158                               &x.adj_index);
1159       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1160       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1161       ip4_add_del_route (im, &x);
1162       a->neighbor_probe_adj_index = x.adj_index;
1163     }
1164   
1165   /* Add e.g. 1.1.1.1/32 as local to this host. */
1166   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1167                           &x.adj_index);
1168   
1169   classify_table_index = ~0;
1170   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1171     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1172   if (classify_table_index != (u32) ~0)
1173     {
1174       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1175       adj->classify.table_index = classify_table_index;
1176     }
1177   else
1178     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1179   
1180   adj->if_address_index = a - lm->if_address_pool;
1181   adj->rewrite_header.sw_if_index = sw_if_index;
1182   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1183   /* 
1184    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1185    * fail an RPF-ish check, but still go thru the rewrite code...
1186    */
1187   adj->rewrite_header.data_bytes = 0;
1188
1189   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1190   x.dst_address_length = 32;
1191   ip4_add_del_route (im, &x);
1192 }
1193
1194 static void
1195 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1196 {
1197   ip4_add_del_route_args_t x;
1198
1199   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1200   x.table_index_or_table_id = fib_index;
1201   x.flags = (IP4_ROUTE_FLAG_DEL
1202              | IP4_ROUTE_FLAG_FIB_INDEX
1203              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1204   x.dst_address = address[0];
1205   x.dst_address_length = address_length;
1206   x.adj_index = ~0;
1207   x.n_add_adj = 0;
1208   x.add_adj = 0;
1209
1210   if (address_length < 32)
1211     ip4_add_del_route (im, &x);
1212
1213   x.dst_address_length = 32;
1214   ip4_add_del_route (im, &x);
1215
1216   ip4_delete_matching_routes (im,
1217                               fib_index,
1218                               IP4_ROUTE_FLAG_FIB_INDEX,
1219                               address,
1220                               address_length);
1221 }
1222
1223 typedef struct {
1224     u32 sw_if_index;
1225     ip4_address_t address;
1226     u32 length;
1227 } ip4_interface_address_t;
1228
1229 static clib_error_t *
1230 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1231                                         u32 sw_if_index,
1232                                         ip4_address_t * new_address,
1233                                         u32 new_length,
1234                                         u32 redistribute,
1235                                         u32 insert_routes,
1236                                         u32 is_del);
1237
1238 static clib_error_t *
1239 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1240                                         u32 sw_if_index,
1241                                         ip4_address_t * address,
1242                                         u32 address_length,
1243                                         u32 redistribute,
1244                                         u32 insert_routes,
1245                                         u32 is_del)
1246 {
1247   vnet_main_t * vnm = vnet_get_main();
1248   ip4_main_t * im = &ip4_main;
1249   ip_lookup_main_t * lm = &im->lookup_main;
1250   clib_error_t * error = 0;
1251   u32 if_address_index, elts_before;
1252   ip4_address_fib_t ip4_af, * addr_fib = 0;
1253
1254   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1255   ip4_addr_fib_init (&ip4_af, address,
1256                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1257   vec_add1 (addr_fib, ip4_af);
1258
1259   /* When adding an address check that it does not conflict with an existing address. */
1260   if (! is_del)
1261     {
1262       ip_interface_address_t * ia;
1263       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1264                                     0 /* honor unnumbered */,
1265       ({
1266         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1267
1268         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1269             || ip4_destination_matches_route (im, x, address, address_length))
1270           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1271                                     format_ip4_address_and_length, address, address_length,
1272                                     format_ip4_address_and_length, x, ia->address_length,
1273                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1274       }));
1275     }
1276
1277   elts_before = pool_elts (lm->if_address_pool);
1278
1279   error = ip_interface_address_add_del
1280     (lm,
1281      sw_if_index,
1282      addr_fib,
1283      address_length,
1284      is_del,
1285      &if_address_index);
1286   if (error)
1287     goto done;
1288   
1289   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1290     {
1291       if (is_del)
1292         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1293                                   address_length);
1294       
1295       else
1296           ip4_add_interface_routes (sw_if_index,
1297                                     im, ip4_af.fib_index,
1298                                     pool_elt_at_index 
1299                                     (lm->if_address_pool, if_address_index));
1300     }
1301
1302   /* If pool did not grow/shrink: add duplicate address. */
1303   if (elts_before != pool_elts (lm->if_address_pool))
1304     {
1305       ip4_add_del_interface_address_callback_t * cb;
1306       vec_foreach (cb, im->add_del_interface_address_callbacks)
1307         cb->function (im, cb->function_opaque, sw_if_index,
1308                       address, address_length,
1309                       if_address_index,
1310                       is_del);
1311     }
1312
1313  done:
1314   vec_free (addr_fib);
1315   return error;
1316 }
1317
1318 clib_error_t *
1319 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1320                                ip4_address_t * address, u32 address_length,
1321                                u32 is_del)
1322 {
1323   return ip4_add_del_interface_address_internal
1324     (vm, sw_if_index, address, address_length,
1325      /* redistribute */ 1,
1326      /* insert_routes */ 1,
1327      is_del);
1328 }
1329
1330 static clib_error_t *
1331 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1332                                 u32 sw_if_index,
1333                                 u32 flags)
1334 {
1335   ip4_main_t * im = &ip4_main;
1336   ip_interface_address_t * ia;
1337   ip4_address_t * a;
1338   u32 is_admin_up, fib_index;
1339   
1340   /* Fill in lookup tables with default table (0). */
1341   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1342   
1343   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1344   
1345   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1346   
1347   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1348
1349   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1350                                 0 /* honor unnumbered */,
1351   ({
1352     a = ip_interface_address_get_address (&im->lookup_main, ia);
1353     if (is_admin_up)
1354       ip4_add_interface_routes (sw_if_index,
1355                                 im, fib_index,
1356                                 ia);
1357     else
1358       ip4_del_interface_routes (im, fib_index,
1359                                 a, ia->address_length);
1360   }));
1361
1362   return 0;
1363 }
1364  
1365 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1366
1367 /* Built-in ip4 unicast rx feature path definition */
1368 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1369   .node_name = "ip4-inacl", 
1370   .runs_before = {"ip4-source-check-via-rx", 0}, 
1371   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1372 };
1373
1374 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1375   .node_name = "ip4-source-check-via-rx",
1376   .runs_before = {"ip4-source-check-via-any", 0},
1377   .feature_index = 
1378   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1379 };
1380
1381 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1382   .node_name = "ip4-source-check-via-any",
1383   .runs_before = {"ipsec-input-ip4", 0},
1384   .feature_index = 
1385   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1386 };
1387
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1389   .node_name = "ipsec-input-ip4",
1390   .runs_before = {"vpath-input-ip4", 0},
1391   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1392 };
1393
1394 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1395   .node_name = "vpath-input-ip4",
1396   .runs_before = {"ip4-lookup", 0},
1397   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1398 };
1399
1400 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1401   .node_name = "ip4-lookup",
1402   .runs_before = {0}, /* not before any other features */
1403   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1404 };
1405
1406 /* Built-in ip4 multicast rx feature path definition */
1407 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1408   .node_name = "vpath-input-ip4",
1409   .runs_before = {"ip4-lookup-multicast", 0},
1410   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1411 };
1412
1413 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1414   .node_name = "ip4-lookup-multicast",
1415   .runs_before = {0}, /* not before any other features */
1416   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1417 };
1418
1419 static char * feature_start_nodes[] = 
1420   { "ip4-input", "ip4-input-no-checksum"};
1421
1422 static clib_error_t *
1423 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1424 {
1425   ip_lookup_main_t * lm = &im->lookup_main;
1426   clib_error_t * error;
1427   vnet_cast_t cast;
1428
1429   for (cast = 0; cast < VNET_N_CAST; cast++)
1430     {
1431       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1432       vnet_config_main_t * vcm = &cm->config_main;
1433
1434       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1435                                          feature_start_nodes,
1436                                          ARRAY_LEN(feature_start_nodes),
1437                                          cast,
1438                                          1 /* is_ip4 */)))
1439         return error;
1440     }
1441   return 0;
1442 }
1443
1444 static clib_error_t *
1445 ip4_sw_interface_add_del (vnet_main_t * vnm,
1446                           u32 sw_if_index,
1447                           u32 is_add)
1448 {
1449   vlib_main_t * vm = vnm->vlib_main;
1450   ip4_main_t * im = &ip4_main;
1451   ip_lookup_main_t * lm = &im->lookup_main;
1452   u32 ci, cast;
1453   u32 feature_index;
1454
1455   for (cast = 0; cast < VNET_N_CAST; cast++)
1456     {
1457       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1458       vnet_config_main_t * vcm = &cm->config_main;
1459
1460       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1461       ci = cm->config_index_by_sw_if_index[sw_if_index];
1462
1463       if (cast == VNET_UNICAST)
1464         feature_index = im->ip4_unicast_rx_feature_lookup;
1465       else
1466         feature_index = im->ip4_multicast_rx_feature_lookup;
1467
1468       if (is_add)
1469         ci = vnet_config_add_feature (vm, vcm,
1470                                       ci,
1471                                       feature_index,
1472                                       /* config data */ 0,
1473                                       /* # bytes of config data */ 0);
1474       else
1475         ci = vnet_config_del_feature (vm, vcm,
1476                                       ci,
1477                                       feature_index,
1478                                       /* config data */ 0,
1479                                       /* # bytes of config data */ 0);
1480
1481       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1482     }
1483
1484   return /* no error */ 0;
1485 }
1486
1487 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1488
1489 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1490
1491 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1492   .function = ip4_lookup,
1493   .name = "ip4-lookup",
1494   .vector_size = sizeof (u32),
1495
1496   .format_trace = format_ip4_lookup_trace,
1497
1498   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1499   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1500 };
1501
1502 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1503
1504 static uword
1505 ip4_indirect (vlib_main_t * vm,
1506                vlib_node_runtime_t * node,
1507                vlib_frame_t * frame)
1508 {
1509   return ip4_lookup_inline (vm, node, frame,
1510                             /* lookup_for_responses_to_locally_received_packets */ 0,
1511                             /* is_indirect */ 1);
1512 }
1513
1514 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1515   .function = ip4_indirect,
1516   .name = "ip4-indirect",
1517   .vector_size = sizeof (u32),
1518   .sibling_of = "ip4-lookup",
1519   .format_trace = format_ip4_lookup_trace,
1520
1521   .n_next_nodes = 0,
1522 };
1523
1524 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1525
1526
1527 /* Global IP4 main. */
1528 ip4_main_t ip4_main;
1529
1530 clib_error_t *
1531 ip4_lookup_init (vlib_main_t * vm)
1532 {
1533   ip4_main_t * im = &ip4_main;
1534   clib_error_t * error;
1535   uword i;
1536
1537   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1538     {
1539       u32 m;
1540
1541       if (i < 32)
1542         m = pow2_mask (i) << (32 - i);
1543       else 
1544         m = ~0;
1545       im->fib_masks[i] = clib_host_to_net_u32 (m);
1546     }
1547
1548   /* Create FIB with index 0 and table id of 0. */
1549   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1550
1551   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1552
1553   {
1554     pg_node_t * pn;
1555     pn = pg_get_node (ip4_lookup_node.index);
1556     pn->unformat_edit = unformat_pg_ip4_header;
1557   }
1558
1559   {
1560     ethernet_arp_header_t h;
1561
1562     memset (&h, 0, sizeof (h));
1563
1564     /* Set target ethernet address to all zeros. */
1565     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1566
1567 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1568 #define _8(f,v) h.f = v;
1569     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1570     _16 (l3_type, ETHERNET_TYPE_IP4);
1571     _8 (n_l2_address_bytes, 6);
1572     _8 (n_l3_address_bytes, 4);
1573     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1574 #undef _16
1575 #undef _8
1576
1577     vlib_packet_template_init (vm,
1578                                &im->ip4_arp_request_packet_template,
1579                                /* data */ &h,
1580                                sizeof (h),
1581                                /* alloc chunk size */ 8,
1582                                "ip4 arp");
1583   }
1584
1585   error = ip4_feature_init (vm, im);
1586
1587   return error;
1588 }
1589
1590 VLIB_INIT_FUNCTION (ip4_lookup_init);
1591
1592 typedef struct {
1593   /* Adjacency taken. */
1594   u32 adj_index;
1595   u32 flow_hash;
1596   u32 fib_index;
1597
1598   /* Packet data, possibly *after* rewrite. */
1599   u8 packet_data[64 - 1*sizeof(u32)];
1600 } ip4_forward_next_trace_t;
1601
1602 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1603 {
1604   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1605   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1606   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1607   uword indent = format_get_indent (s);
1608   s = format (s, "%U%U",
1609                 format_white_space, indent,
1610                 format_ip4_header, t->packet_data);
1611   return s;
1612 }
1613
1614 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1615 {
1616   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1617   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1618   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1619   vnet_main_t * vnm = vnet_get_main();
1620   ip4_main_t * im = &ip4_main;
1621   uword indent = format_get_indent (s);
1622
1623   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1624               t->fib_index, t->adj_index, format_ip_adjacency,
1625               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1626   s = format (s, "\n%U%U",
1627               format_white_space, indent,
1628               format_ip4_header, t->packet_data);
1629   return s;
1630 }
1631
1632 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1633 {
1634   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1635   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1636   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1637   vnet_main_t * vnm = vnet_get_main();
1638   ip4_main_t * im = &ip4_main;
1639   uword indent = format_get_indent (s);
1640
1641   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1642               t->fib_index, t->adj_index, format_ip_adjacency,
1643               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1644   s = format (s, "\n%U%U",
1645               format_white_space, indent,
1646               format_ip_adjacency_packet_data,
1647               vnm, &im->lookup_main, t->adj_index,
1648               t->packet_data, sizeof (t->packet_data));
1649   return s;
1650 }
1651
1652 /* Common trace function for all ip4-forward next nodes. */
1653 void
1654 ip4_forward_next_trace (vlib_main_t * vm,
1655                         vlib_node_runtime_t * node,
1656                         vlib_frame_t * frame,
1657                         vlib_rx_or_tx_t which_adj_index)
1658 {
1659   u32 * from, n_left;
1660   ip4_main_t * im = &ip4_main;
1661
1662   n_left = frame->n_vectors;
1663   from = vlib_frame_vector_args (frame);
1664   
1665   while (n_left >= 4)
1666     {
1667       u32 bi0, bi1;
1668       vlib_buffer_t * b0, * b1;
1669       ip4_forward_next_trace_t * t0, * t1;
1670
1671       /* Prefetch next iteration. */
1672       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1673       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1674
1675       bi0 = from[0];
1676       bi1 = from[1];
1677
1678       b0 = vlib_get_buffer (vm, bi0);
1679       b1 = vlib_get_buffer (vm, bi1);
1680
1681       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1682         {
1683           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1684           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1685           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1686           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1687               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1688               vec_elt (im->fib_index_by_sw_if_index,
1689                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1690
1691           clib_memcpy (t0->packet_data,
1692                   vlib_buffer_get_current (b0),
1693                   sizeof (t0->packet_data));
1694         }
1695       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1696         {
1697           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1698           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1699           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1700           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1701               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1702               vec_elt (im->fib_index_by_sw_if_index,
1703                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1704           clib_memcpy (t1->packet_data,
1705                   vlib_buffer_get_current (b1),
1706                   sizeof (t1->packet_data));
1707         }
1708       from += 2;
1709       n_left -= 2;
1710     }
1711
1712   while (n_left >= 1)
1713     {
1714       u32 bi0;
1715       vlib_buffer_t * b0;
1716       ip4_forward_next_trace_t * t0;
1717
1718       bi0 = from[0];
1719
1720       b0 = vlib_get_buffer (vm, bi0);
1721
1722       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1723         {
1724           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1725           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1726           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1727           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1728               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1729               vec_elt (im->fib_index_by_sw_if_index,
1730                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1731           clib_memcpy (t0->packet_data,
1732                   vlib_buffer_get_current (b0),
1733                   sizeof (t0->packet_data));
1734         }
1735       from += 1;
1736       n_left -= 1;
1737     }
1738 }
1739
1740 static uword
1741 ip4_drop_or_punt (vlib_main_t * vm,
1742                   vlib_node_runtime_t * node,
1743                   vlib_frame_t * frame,
1744                   ip4_error_t error_code)
1745 {
1746   u32 * buffers = vlib_frame_vector_args (frame);
1747   uword n_packets = frame->n_vectors;
1748
1749   vlib_error_drop_buffers (vm, node,
1750                            buffers,
1751                            /* stride */ 1,
1752                            n_packets,
1753                            /* next */ 0,
1754                            ip4_input_node.index,
1755                            error_code);
1756
1757   if (node->flags & VLIB_NODE_FLAG_TRACE)
1758     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1759
1760   return n_packets;
1761 }
1762
1763 static uword
1764 ip4_drop (vlib_main_t * vm,
1765           vlib_node_runtime_t * node,
1766           vlib_frame_t * frame)
1767 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1768
1769 static uword
1770 ip4_punt (vlib_main_t * vm,
1771           vlib_node_runtime_t * node,
1772           vlib_frame_t * frame)
1773 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1774
1775 static uword
1776 ip4_miss (vlib_main_t * vm,
1777           vlib_node_runtime_t * node,
1778           vlib_frame_t * frame)
1779 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1780
1781 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1782   .function = ip4_drop,
1783   .name = "ip4-drop",
1784   .vector_size = sizeof (u32),
1785
1786   .format_trace = format_ip4_forward_next_trace,
1787
1788   .n_next_nodes = 1,
1789   .next_nodes = {
1790     [0] = "error-drop",
1791   },
1792 };
1793
1794 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1795
1796 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1797   .function = ip4_punt,
1798   .name = "ip4-punt",
1799   .vector_size = sizeof (u32),
1800
1801   .format_trace = format_ip4_forward_next_trace,
1802
1803   .n_next_nodes = 1,
1804   .next_nodes = {
1805     [0] = "error-punt",
1806   },
1807 };
1808
1809 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1810
1811 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1812   .function = ip4_miss,
1813   .name = "ip4-miss",
1814   .vector_size = sizeof (u32),
1815
1816   .format_trace = format_ip4_forward_next_trace,
1817
1818   .n_next_nodes = 1,
1819   .next_nodes = {
1820     [0] = "error-drop",
1821   },
1822 };
1823
1824 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1825
1826 /* Compute TCP/UDP/ICMP4 checksum in software. */
1827 u16
1828 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1829                               ip4_header_t * ip0)
1830 {
1831   ip_csum_t sum0;
1832   u32 ip_header_length, payload_length_host_byte_order;
1833   u32 n_this_buffer, n_bytes_left;
1834   u16 sum16;
1835   void * data_this_buffer;
1836   
1837   /* Initialize checksum with ip header. */
1838   ip_header_length = ip4_header_bytes (ip0);
1839   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1840   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1841
1842   if (BITS (uword) == 32)
1843     {
1844       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1845       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1846     }
1847   else
1848     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1849
1850   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1851   data_this_buffer = (void *) ip0 + ip_header_length;
1852   if (n_this_buffer + ip_header_length > p0->current_length)
1853     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1854   while (1)
1855     {
1856       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1857       n_bytes_left -= n_this_buffer;
1858       if (n_bytes_left == 0)
1859         break;
1860
1861       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1862       p0 = vlib_get_buffer (vm, p0->next_buffer);
1863       data_this_buffer = vlib_buffer_get_current (p0);
1864       n_this_buffer = p0->current_length;
1865     }
1866
1867   sum16 = ~ ip_csum_fold (sum0);
1868
1869   return sum16;
1870 }
1871
1872 static u32
1873 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1874 {
1875   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1876   udp_header_t * udp0;
1877   u16 sum16;
1878
1879   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1880           || ip0->protocol == IP_PROTOCOL_UDP);
1881
1882   udp0 = (void *) (ip0 + 1);
1883   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1884     {
1885       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1886                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1887       return p0->flags;
1888     }
1889
1890   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1891
1892   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1893                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1894
1895   return p0->flags;
1896 }
1897
1898 static uword
1899 ip4_local (vlib_main_t * vm,
1900            vlib_node_runtime_t * node,
1901            vlib_frame_t * frame)
1902 {
1903   ip4_main_t * im = &ip4_main;
1904   ip_lookup_main_t * lm = &im->lookup_main;
1905   ip_local_next_t next_index;
1906   u32 * from, * to_next, n_left_from, n_left_to_next;
1907   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1908
1909   from = vlib_frame_vector_args (frame);
1910   n_left_from = frame->n_vectors;
1911   next_index = node->cached_next_index;
1912   
1913   if (node->flags & VLIB_NODE_FLAG_TRACE)
1914     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1915
1916   while (n_left_from > 0)
1917     {
1918       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1919
1920       while (n_left_from >= 4 && n_left_to_next >= 2)
1921         {
1922           vlib_buffer_t * p0, * p1;
1923           ip4_header_t * ip0, * ip1;
1924           udp_header_t * udp0, * udp1;
1925           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1926           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1927           ip_adjacency_t * adj0, * adj1;
1928           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1929           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1930           i32 len_diff0, len_diff1;
1931           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1932           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1933           u8 enqueue_code;
1934       
1935           pi0 = to_next[0] = from[0];
1936           pi1 = to_next[1] = from[1];
1937           from += 2;
1938           n_left_from -= 2;
1939           to_next += 2;
1940           n_left_to_next -= 2;
1941       
1942           p0 = vlib_get_buffer (vm, pi0);
1943           p1 = vlib_get_buffer (vm, pi1);
1944
1945           ip0 = vlib_buffer_get_current (p0);
1946           ip1 = vlib_buffer_get_current (p1);
1947
1948           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1949                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1950           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1951                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1952
1953           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1954           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1955
1956           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1957
1958           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1959           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1960
1961           /* Treat IP frag packets as "experimental" protocol for now
1962              until support of IP frag reassembly is implemented */
1963           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1964           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1965           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1966           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1967           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1968           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1969
1970           flags0 = p0->flags;
1971           flags1 = p1->flags;
1972
1973           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1974           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1975
1976           udp0 = ip4_next_header (ip0);
1977           udp1 = ip4_next_header (ip1);
1978
1979           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1980           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1981           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1982
1983           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1984           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1985
1986           /* Verify UDP length. */
1987           ip_len0 = clib_net_to_host_u16 (ip0->length);
1988           ip_len1 = clib_net_to_host_u16 (ip1->length);
1989           udp_len0 = clib_net_to_host_u16 (udp0->length);
1990           udp_len1 = clib_net_to_host_u16 (udp1->length);
1991
1992           len_diff0 = ip_len0 - udp_len0;
1993           len_diff1 = ip_len1 - udp_len1;
1994
1995           len_diff0 = is_udp0 ? len_diff0 : 0;
1996           len_diff1 = is_udp1 ? len_diff1 : 0;
1997
1998           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1999                                 & good_tcp_udp0 & good_tcp_udp1)))
2000             {
2001               if (is_tcp_udp0)
2002                 {
2003                   if (is_tcp_udp0
2004                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2005                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2006                   good_tcp_udp0 =
2007                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2008                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2009                 }
2010               if (is_tcp_udp1)
2011                 {
2012                   if (is_tcp_udp1
2013                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2014                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2015                   good_tcp_udp1 =
2016                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2017                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2018                 }
2019             }
2020
2021           good_tcp_udp0 &= len_diff0 >= 0;
2022           good_tcp_udp1 &= len_diff1 >= 0;
2023
2024           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2025           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2026
2027           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2028
2029           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2030           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2031
2032           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2033           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2034                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2035                     : error0);
2036           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2037                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2038                     : error1);
2039
2040           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2041           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2042
2043           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2044           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2045
2046           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2047           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2048
2049           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2050                                                            &ip0->src_address,
2051                                                            /* no_default_route */ 1));
2052           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2053                                                            &ip1->src_address,
2054                                                            /* no_default_route */ 1));
2055
2056           adj0 = ip_get_adjacency (lm, adj_index0);
2057           adj1 = ip_get_adjacency (lm, adj_index1);
2058
2059           /* 
2060            * Must have a route to source otherwise we drop the packet.
2061            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2062            */
2063           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2064                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2065                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2066                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2067                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2068                     ? IP4_ERROR_SRC_LOOKUP_MISS
2069                     : error0);
2070           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2071                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2072                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2073                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2074                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2075                     ? IP4_ERROR_SRC_LOOKUP_MISS
2076                     : error1);
2077
2078           next0 = lm->local_next_by_ip_protocol[proto0];
2079           next1 = lm->local_next_by_ip_protocol[proto1];
2080
2081           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2082           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2083
2084           p0->error = error0 ? error_node->errors[error0] : 0;
2085           p1->error = error1 ? error_node->errors[error1] : 0;
2086
2087           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2088
2089           if (PREDICT_FALSE (enqueue_code != 0))
2090             {
2091               switch (enqueue_code)
2092                 {
2093                 case 1:
2094                   /* A B A */
2095                   to_next[-2] = pi1;
2096                   to_next -= 1;
2097                   n_left_to_next += 1;
2098                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2099                   break;
2100
2101                 case 2:
2102                   /* A A B */
2103                   to_next -= 1;
2104                   n_left_to_next += 1;
2105                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2106                   break;
2107
2108                 case 3:
2109                   /* A B B or A B C */
2110                   to_next -= 2;
2111                   n_left_to_next += 2;
2112                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2113                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2114                   if (next0 == next1)
2115                     {
2116                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2117                       next_index = next1;
2118                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2119                     }
2120                   break;
2121                 }
2122             }
2123         }
2124
2125       while (n_left_from > 0 && n_left_to_next > 0)
2126         {
2127           vlib_buffer_t * p0;
2128           ip4_header_t * ip0;
2129           udp_header_t * udp0;
2130           ip4_fib_mtrie_t * mtrie0;
2131           ip4_fib_mtrie_leaf_t leaf0;
2132           ip_adjacency_t * adj0;
2133           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2134           i32 len_diff0;
2135           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2136       
2137           pi0 = to_next[0] = from[0];
2138           from += 1;
2139           n_left_from -= 1;
2140           to_next += 1;
2141           n_left_to_next -= 1;
2142       
2143           p0 = vlib_get_buffer (vm, pi0);
2144
2145           ip0 = vlib_buffer_get_current (p0);
2146
2147           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2148                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2149
2150           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2151
2152           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2153
2154           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2155
2156           /* Treat IP frag packets as "experimental" protocol for now
2157              until support of IP frag reassembly is implemented */
2158           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2159           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2160           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2161
2162           flags0 = p0->flags;
2163
2164           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2165
2166           udp0 = ip4_next_header (ip0);
2167
2168           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2169           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2170
2171           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2172
2173           /* Verify UDP length. */
2174           ip_len0 = clib_net_to_host_u16 (ip0->length);
2175           udp_len0 = clib_net_to_host_u16 (udp0->length);
2176
2177           len_diff0 = ip_len0 - udp_len0;
2178
2179           len_diff0 = is_udp0 ? len_diff0 : 0;
2180
2181           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2182             {
2183               if (is_tcp_udp0)
2184                 {
2185                   if (is_tcp_udp0
2186                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2187                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2188                   good_tcp_udp0 =
2189                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2190                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2191                 }
2192             }
2193
2194           good_tcp_udp0 &= len_diff0 >= 0;
2195
2196           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2197
2198           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2199
2200           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2201
2202           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2203           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2204                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2205                     : error0);
2206
2207           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2208
2209           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2210           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2211
2212           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2213                                                            &ip0->src_address,
2214                                                            /* no_default_route */ 1));
2215
2216           adj0 = ip_get_adjacency (lm, adj_index0);
2217
2218           /* Must have a route to source otherwise we drop the packet. */
2219           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2220                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2221                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2222                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2223                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2224                     ? IP4_ERROR_SRC_LOOKUP_MISS
2225                     : error0);
2226
2227           next0 = lm->local_next_by_ip_protocol[proto0];
2228
2229           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2230
2231           p0->error = error0? error_node->errors[error0] : 0;
2232
2233           if (PREDICT_FALSE (next0 != next_index))
2234             {
2235               n_left_to_next += 1;
2236               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2237
2238               next_index = next0;
2239               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2240               to_next[0] = pi0;
2241               to_next += 1;
2242               n_left_to_next -= 1;
2243             }
2244         }
2245   
2246       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2247     }
2248
2249   return frame->n_vectors;
2250 }
2251
2252 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2253   .function = ip4_local,
2254   .name = "ip4-local",
2255   .vector_size = sizeof (u32),
2256
2257   .format_trace = format_ip4_forward_next_trace,
2258
2259   .n_next_nodes = IP_LOCAL_N_NEXT,
2260   .next_nodes = {
2261     [IP_LOCAL_NEXT_DROP] = "error-drop",
2262     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2263     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2264     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2265   },
2266 };
2267
2268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2269
2270 void ip4_register_protocol (u32 protocol, u32 node_index)
2271 {
2272   vlib_main_t * vm = vlib_get_main();
2273   ip4_main_t * im = &ip4_main;
2274   ip_lookup_main_t * lm = &im->lookup_main;
2275
2276   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2277   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2278 }
2279
2280 static clib_error_t *
2281 show_ip_local_command_fn (vlib_main_t * vm,
2282                           unformat_input_t * input,
2283                          vlib_cli_command_t * cmd)
2284 {
2285   ip4_main_t * im = &ip4_main;
2286   ip_lookup_main_t * lm = &im->lookup_main;
2287   int i;
2288
2289   vlib_cli_output (vm, "Protocols handled by ip4_local");
2290   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2291     {
2292       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2293         vlib_cli_output (vm, "%d", i);
2294     }
2295   return 0;
2296 }
2297
2298
2299
2300 VLIB_CLI_COMMAND (show_ip_local, static) = {
2301   .path = "show ip local",
2302   .function = show_ip_local_command_fn,
2303   .short_help = "Show ip local protocol table",
2304 };
2305
2306 static uword
2307 ip4_arp (vlib_main_t * vm,
2308          vlib_node_runtime_t * node,
2309          vlib_frame_t * frame)
2310 {
2311   vnet_main_t * vnm = vnet_get_main();
2312   ip4_main_t * im = &ip4_main;
2313   ip_lookup_main_t * lm = &im->lookup_main;
2314   u32 * from, * to_next_drop;
2315   uword n_left_from, n_left_to_next_drop, next_index;
2316   static f64 time_last_seed_change = -1e100;
2317   static u32 hash_seeds[3];
2318   static uword hash_bitmap[256 / BITS (uword)]; 
2319   f64 time_now;
2320
2321   if (node->flags & VLIB_NODE_FLAG_TRACE)
2322     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2323
2324   time_now = vlib_time_now (vm);
2325   if (time_now - time_last_seed_change > 1e-3)
2326     {
2327       uword i;
2328       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2329                                              sizeof (hash_seeds));
2330       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2331         hash_seeds[i] = r[i];
2332
2333       /* Mark all hash keys as been no-seen before. */
2334       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2335         hash_bitmap[i] = 0;
2336
2337       time_last_seed_change = time_now;
2338     }
2339
2340   from = vlib_frame_vector_args (frame);
2341   n_left_from = frame->n_vectors;
2342   next_index = node->cached_next_index;
2343   if (next_index == IP4_ARP_NEXT_DROP)
2344     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2345
2346   while (n_left_from > 0)
2347     {
2348       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2349                            to_next_drop, n_left_to_next_drop);
2350
2351       while (n_left_from > 0 && n_left_to_next_drop > 0)
2352         {
2353           vlib_buffer_t * p0;
2354           ip4_header_t * ip0;
2355           ethernet_header_t * eh0;
2356           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2357           uword bm0;
2358           ip_adjacency_t * adj0;
2359
2360           pi0 = from[0];
2361
2362           p0 = vlib_get_buffer (vm, pi0);
2363
2364           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2365           adj0 = ip_get_adjacency (lm, adj_index0);
2366           ip0 = vlib_buffer_get_current (p0);
2367
2368           /* If packet destination is not local, send ARP to next hop */
2369           if (adj0->arp.next_hop.ip4.as_u32)
2370             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2371
2372           /* 
2373            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2374            * rewrite to this packet, we need to skip it here.
2375            * Note, to distinguish from src IP addr *.8.6.*, we
2376            * check for a bcast eth dest instead of IPv4 version.
2377            */
2378           eh0 = (ethernet_header_t*)ip0;
2379           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2380             {
2381               u32 vlan_num = 0;
2382               u16 * etype = &eh0->type;
2383               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2384                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2385                 {
2386                   vlan_num += 1;
2387                   etype += 2; //vlan tag also 16 bits, same as etype
2388                 }
2389               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2390                 {
2391                   vlib_buffer_advance (
2392                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2393                   ip0 = vlib_buffer_get_current (p0);
2394                 }
2395             }
2396
2397           a0 = hash_seeds[0];
2398           b0 = hash_seeds[1];
2399           c0 = hash_seeds[2];
2400
2401           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2402           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2403
2404           a0 ^= ip0->dst_address.data_u32;
2405           b0 ^= sw_if_index0;
2406
2407           hash_v3_finalize32 (a0, b0, c0);
2408
2409           c0 &= BITS (hash_bitmap) - 1;
2410           c0 = c0 / BITS (uword);
2411           m0 = (uword) 1 << (c0 % BITS (uword));
2412
2413           bm0 = hash_bitmap[c0];
2414           drop0 = (bm0 & m0) != 0;
2415
2416           /* Mark it as seen. */
2417           hash_bitmap[c0] = bm0 | m0;
2418
2419           from += 1;
2420           n_left_from -= 1;
2421           to_next_drop[0] = pi0;
2422           to_next_drop += 1;
2423           n_left_to_next_drop -= 1;
2424
2425           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2426
2427           if (drop0)
2428             continue;
2429
2430           /* 
2431            * Can happen if the control-plane is programming tables
2432            * with traffic flowing; at least that's today's lame excuse.
2433            */
2434           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2435             {
2436               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2437             }
2438           else
2439           /* Send ARP request. */
2440           {
2441             u32 bi0 = 0;
2442             vlib_buffer_t * b0;
2443             ethernet_arp_header_t * h0;
2444             vnet_hw_interface_t * hw_if0;
2445
2446             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2447
2448             /* Add rewrite/encap string for ARP packet. */
2449             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2450
2451             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2452
2453             /* Src ethernet address in ARP header. */
2454             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2455                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2456
2457             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2458                 //No source address available
2459                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2460                 vlib_buffer_free(vm, &bi0, 1);
2461                 continue;
2462             }
2463
2464             /* Copy in destination address we are requesting. */
2465             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2466
2467             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2468             b0 = vlib_get_buffer (vm, bi0);
2469             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2470
2471             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2472
2473             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2474           }
2475         }
2476
2477       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2478     }
2479
2480   return frame->n_vectors;
2481 }
2482
2483 static char * ip4_arp_error_strings[] = {
2484   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2485   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2486   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2487   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2488   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2489   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2490 };
2491
2492 VLIB_REGISTER_NODE (ip4_arp_node) = {
2493   .function = ip4_arp,
2494   .name = "ip4-arp",
2495   .vector_size = sizeof (u32),
2496
2497   .format_trace = format_ip4_forward_next_trace,
2498
2499   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2500   .error_strings = ip4_arp_error_strings,
2501
2502   .n_next_nodes = IP4_ARP_N_NEXT,
2503   .next_nodes = {
2504     [IP4_ARP_NEXT_DROP] = "error-drop",
2505   },
2506 };
2507
2508 #define foreach_notrace_ip4_arp_error           \
2509 _(DROP)                                         \
2510 _(REQUEST_SENT)                                 \
2511 _(REPLICATE_DROP)                               \
2512 _(REPLICATE_FAIL)
2513
2514 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2515 {
2516   vlib_node_runtime_t *rt = 
2517     vlib_node_get_runtime (vm, ip4_arp_node.index);
2518
2519   /* don't trace ARP request packets */
2520 #define _(a)                                    \
2521     vnet_pcap_drop_trace_filter_add_del         \
2522         (rt->errors[IP4_ARP_ERROR_##a],         \
2523          1 /* is_add */);
2524     foreach_notrace_ip4_arp_error;
2525 #undef _
2526   return 0;
2527 }
2528
2529 VLIB_INIT_FUNCTION(arp_notrace_init);
2530
2531
2532 /* Send an ARP request to see if given destination is reachable on given interface. */
2533 clib_error_t *
2534 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2535 {
2536   vnet_main_t * vnm = vnet_get_main();
2537   ip4_main_t * im = &ip4_main;
2538   ethernet_arp_header_t * h;
2539   ip4_address_t * src;
2540   ip_interface_address_t * ia;
2541   ip_adjacency_t * adj;
2542   vnet_hw_interface_t * hi;
2543   vnet_sw_interface_t * si;
2544   vlib_buffer_t * b;
2545   u32 bi = 0;
2546
2547   si = vnet_get_sw_interface (vnm, sw_if_index);
2548
2549   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2550     {
2551       return clib_error_return (0, "%U: interface %U down",
2552                                 format_ip4_address, dst, 
2553                                 format_vnet_sw_if_index_name, vnm, 
2554                                 sw_if_index);
2555     }
2556
2557   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2558   if (! src)
2559     {
2560       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2561       return clib_error_return 
2562         (0, "no matching interface address for destination %U (interface %U)",
2563          format_ip4_address, dst,
2564          format_vnet_sw_if_index_name, vnm, sw_if_index);
2565     }
2566
2567   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2568
2569   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2570
2571   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2572
2573   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2574
2575   h->ip4_over_ethernet[0].ip4 = src[0];
2576   h->ip4_over_ethernet[1].ip4 = dst[0];
2577
2578   b = vlib_get_buffer (vm, bi);
2579   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2580
2581   /* Add encapsulation string for software interface (e.g. ethernet header). */
2582   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2583   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2584
2585   {
2586     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2587     u32 * to_next = vlib_frame_vector_args (f);
2588     to_next[0] = bi;
2589     f->n_vectors = 1;
2590     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2591   }
2592
2593   return /* no error */ 0;
2594 }
2595
2596 typedef enum {
2597   IP4_REWRITE_NEXT_DROP,
2598   IP4_REWRITE_NEXT_ARP,
2599   IP4_REWRITE_NEXT_ICMP_ERROR,
2600 } ip4_rewrite_next_t;
2601
2602 always_inline uword
2603 ip4_rewrite_inline (vlib_main_t * vm,
2604                     vlib_node_runtime_t * node,
2605                     vlib_frame_t * frame,
2606                     int rewrite_for_locally_received_packets)
2607 {
2608   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2609   u32 * from = vlib_frame_vector_args (frame);
2610   u32 n_left_from, n_left_to_next, * to_next, next_index;
2611   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2612   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2613
2614   n_left_from = frame->n_vectors;
2615   next_index = node->cached_next_index;
2616   u32 cpu_index = os_get_cpu_number();
2617   
2618   while (n_left_from > 0)
2619     {
2620       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2621
2622       while (n_left_from >= 4 && n_left_to_next >= 2)
2623         {
2624           ip_adjacency_t * adj0, * adj1;
2625           vlib_buffer_t * p0, * p1;
2626           ip4_header_t * ip0, * ip1;
2627           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2628           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2629           u32 next0_override, next1_override;
2630       
2631           if (rewrite_for_locally_received_packets)
2632               next0_override = next1_override = 0;
2633
2634           /* Prefetch next iteration. */
2635           {
2636             vlib_buffer_t * p2, * p3;
2637
2638             p2 = vlib_get_buffer (vm, from[2]);
2639             p3 = vlib_get_buffer (vm, from[3]);
2640
2641             vlib_prefetch_buffer_header (p2, STORE);
2642             vlib_prefetch_buffer_header (p3, STORE);
2643
2644             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2645             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2646           }
2647
2648           pi0 = to_next[0] = from[0];
2649           pi1 = to_next[1] = from[1];
2650
2651           from += 2;
2652           n_left_from -= 2;
2653           to_next += 2;
2654           n_left_to_next -= 2;
2655       
2656           p0 = vlib_get_buffer (vm, pi0);
2657           p1 = vlib_get_buffer (vm, pi1);
2658
2659           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2660           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2661
2662           /* We should never rewrite a pkt using the MISS adjacency */
2663           ASSERT(adj_index0 && adj_index1);
2664
2665           ip0 = vlib_buffer_get_current (p0);
2666           ip1 = vlib_buffer_get_current (p1);
2667
2668           error0 = error1 = IP4_ERROR_NONE;
2669           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2670
2671           /* Decrement TTL & update checksum.
2672              Works either endian, so no need for byte swap. */
2673           if (! rewrite_for_locally_received_packets)
2674             {
2675               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2676
2677               /* Input node should have reject packets with ttl 0. */
2678               ASSERT (ip0->ttl > 0);
2679               ASSERT (ip1->ttl > 0);
2680
2681               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2682               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2683
2684               checksum0 += checksum0 >= 0xffff;
2685               checksum1 += checksum1 >= 0xffff;
2686
2687               ip0->checksum = checksum0;
2688               ip1->checksum = checksum1;
2689
2690               ttl0 -= 1;
2691               ttl1 -= 1;
2692
2693               ip0->ttl = ttl0;
2694               ip1->ttl = ttl1;
2695
2696               /*
2697                * If the ttl drops below 1 when forwarding, generate
2698                * an ICMP response.
2699                */
2700               if (PREDICT_FALSE(ttl0 <= 0))
2701                 {
2702                   error0 = IP4_ERROR_TIME_EXPIRED;
2703                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2704                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2705                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2706                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2707                 }
2708               if (PREDICT_FALSE(ttl1 <= 0))
2709                 {
2710                   error1 = IP4_ERROR_TIME_EXPIRED;
2711                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2712                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2713                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2714                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2715                 }
2716
2717               /* Verify checksum. */
2718               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2719               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2720             }
2721
2722           /* Rewrite packet header and updates lengths. */
2723           adj0 = ip_get_adjacency (lm, adj_index0);
2724           adj1 = ip_get_adjacency (lm, adj_index1);
2725       
2726           if (rewrite_for_locally_received_packets)
2727             {
2728               /*
2729                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2730                * we end up here with a local adjacency in hand
2731                * The local adj rewrite data is 0xfefe on purpose.
2732                * Bad engineer, no donut for you.
2733                */
2734               if (PREDICT_FALSE(adj0->lookup_next_index 
2735                                 == IP_LOOKUP_NEXT_LOCAL))
2736                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2737               if (PREDICT_FALSE(adj0->lookup_next_index
2738                                 == IP_LOOKUP_NEXT_ARP))
2739                 next0_override = IP4_REWRITE_NEXT_ARP;
2740               if (PREDICT_FALSE(adj1->lookup_next_index 
2741                                 == IP_LOOKUP_NEXT_LOCAL))
2742                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2743               if (PREDICT_FALSE(adj1->lookup_next_index
2744                                 == IP_LOOKUP_NEXT_ARP))
2745                 next1_override = IP4_REWRITE_NEXT_ARP;
2746             }
2747
2748           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2749           rw_len0 = adj0[0].rewrite_header.data_bytes;
2750           rw_len1 = adj1[0].rewrite_header.data_bytes;
2751
2752           /* Check MTU of outgoing interface. */
2753           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2754                     ? IP4_ERROR_MTU_EXCEEDED
2755                     : error0);
2756           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2757                     ? IP4_ERROR_MTU_EXCEEDED
2758                     : error1);
2759
2760           next0 = (error0 == IP4_ERROR_NONE)
2761             ? adj0[0].rewrite_header.next_index : next0;
2762
2763           if (rewrite_for_locally_received_packets)
2764               next0 = next0 && next0_override ? next0_override : next0;
2765
2766           next1 = (error1 == IP4_ERROR_NONE)
2767             ? adj1[0].rewrite_header.next_index : next1;
2768
2769           if (rewrite_for_locally_received_packets)
2770               next1 = next1 && next1_override ? next1_override : next1;
2771
2772           /* 
2773            * We've already accounted for an ethernet_header_t elsewhere
2774            */
2775           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2776               vlib_increment_combined_counter 
2777                   (&lm->adjacency_counters,
2778                    cpu_index, adj_index0, 
2779                    /* packet increment */ 0,
2780                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2781
2782           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2783               vlib_increment_combined_counter 
2784                   (&lm->adjacency_counters,
2785                    cpu_index, adj_index1, 
2786                    /* packet increment */ 0,
2787                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2788
2789           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2790            * to see the IP headerr */
2791           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2792             {
2793               p0->current_data -= rw_len0;
2794               p0->current_length += rw_len0;
2795               p0->error = error_node->errors[error0];
2796               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2797                   adj0[0].rewrite_header.sw_if_index;
2798             }
2799           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2800             {
2801               p1->current_data -= rw_len1;
2802               p1->current_length += rw_len1;
2803               p1->error = error_node->errors[error1];
2804               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2805                   adj1[0].rewrite_header.sw_if_index;
2806             }
2807
2808           /* Guess we are only writing on simple Ethernet header. */
2809           vnet_rewrite_two_headers (adj0[0], adj1[0],
2810                                     ip0, ip1,
2811                                     sizeof (ethernet_header_t));
2812       
2813           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2814                                            to_next, n_left_to_next,
2815                                            pi0, pi1, next0, next1);
2816         }
2817
2818       while (n_left_from > 0 && n_left_to_next > 0)
2819         {
2820           ip_adjacency_t * adj0;
2821           vlib_buffer_t * p0;
2822           ip4_header_t * ip0;
2823           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2824           u32 next0_override;
2825       
2826           if (rewrite_for_locally_received_packets)
2827               next0_override = 0;
2828
2829           pi0 = to_next[0] = from[0];
2830
2831           p0 = vlib_get_buffer (vm, pi0);
2832
2833           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2834
2835           /* We should never rewrite a pkt using the MISS adjacency */
2836           ASSERT(adj_index0);
2837
2838           adj0 = ip_get_adjacency (lm, adj_index0);
2839       
2840           ip0 = vlib_buffer_get_current (p0);
2841
2842           error0 = IP4_ERROR_NONE;
2843           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2844
2845           /* Decrement TTL & update checksum. */
2846           if (! rewrite_for_locally_received_packets)
2847             {
2848               i32 ttl0 = ip0->ttl;
2849
2850               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2851
2852               checksum0 += checksum0 >= 0xffff;
2853
2854               ip0->checksum = checksum0;
2855
2856               ASSERT (ip0->ttl > 0);
2857
2858               ttl0 -= 1;
2859
2860               ip0->ttl = ttl0;
2861
2862               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2863
2864               if (PREDICT_FALSE(ttl0 <= 0))
2865                 {
2866                   /*
2867                    * If the ttl drops below 1 when forwarding, generate
2868                    * an ICMP response.
2869                    */
2870                   error0 = IP4_ERROR_TIME_EXPIRED;
2871                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2872                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2873                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2874                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2875                 }
2876             }
2877
2878           if (rewrite_for_locally_received_packets)
2879             {
2880               /*
2881                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2882                * we end up here with a local adjacency in hand
2883                * The local adj rewrite data is 0xfefe on purpose.
2884                * Bad engineer, no donut for you.
2885                */
2886               if (PREDICT_FALSE(adj0->lookup_next_index 
2887                                 == IP_LOOKUP_NEXT_LOCAL))
2888                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2889               /* 
2890                * We have to override the next_index in ARP adjacencies,
2891                * because they're set up for ip4-arp, not this node...
2892                */
2893               if (PREDICT_FALSE(adj0->lookup_next_index
2894                                 == IP_LOOKUP_NEXT_ARP))
2895                 next0_override = IP4_REWRITE_NEXT_ARP;
2896             }
2897
2898           /* Guess we are only writing on simple Ethernet header. */
2899           vnet_rewrite_one_header (adj0[0], ip0, 
2900                                    sizeof (ethernet_header_t));
2901           
2902           /* Update packet buffer attributes/set output interface. */
2903           rw_len0 = adj0[0].rewrite_header.data_bytes;
2904           
2905           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2906               vlib_increment_combined_counter 
2907                   (&lm->adjacency_counters,
2908                    cpu_index, adj_index0, 
2909                    /* packet increment */ 0,
2910                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2911           
2912           /* Check MTU of outgoing interface. */
2913           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2914                     > adj0[0].rewrite_header.max_l3_packet_bytes
2915                     ? IP4_ERROR_MTU_EXCEEDED
2916                     : error0);
2917
2918           p0->error = error_node->errors[error0];
2919
2920           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2921            * to see the IP headerr */
2922           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2923             {
2924               p0->current_data -= rw_len0;
2925               p0->current_length += rw_len0;
2926
2927               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2928                   adj0[0].rewrite_header.sw_if_index;
2929               next0 = adj0[0].rewrite_header.next_index;
2930             }
2931
2932           if (rewrite_for_locally_received_packets)
2933               next0 = next0 && next0_override ? next0_override : next0;
2934
2935           from += 1;
2936           n_left_from -= 1;
2937           to_next += 1;
2938           n_left_to_next -= 1;
2939       
2940           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2941                                            to_next, n_left_to_next,
2942                                            pi0, next0);
2943         }
2944   
2945       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2946     }
2947
2948   /* Need to do trace after rewrites to pick up new packet data. */
2949   if (node->flags & VLIB_NODE_FLAG_TRACE)
2950     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2951
2952   return frame->n_vectors;
2953 }
2954
2955
2956 /** \brief IPv4 transit rewrite node.
2957     @node ip4-rewrite-transit
2958
2959     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2960     header checksum, fetch the ip adjacency, check the outbound mtu,
2961     apply the adjacency rewrite, and send pkts to the adjacency
2962     rewrite header's rewrite_next_index.
2963
2964     @param vm vlib_main_t corresponding to the current thread
2965     @param node vlib_node_runtime_t
2966     @param frame vlib_frame_t whose contents should be dispatched
2967
2968     @par Graph mechanics: buffer metadata, next index usage
2969
2970     @em Uses:
2971     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2972         - the rewrite adjacency index
2973     - <code>adj->lookup_next_index</code>
2974         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2975           the packet will be dropped. 
2976     - <code>adj->rewrite_header</code>
2977         - Rewrite string length, rewrite string, next_index
2978
2979     @em Sets:
2980     - <code>b->current_data, b->current_length</code>
2981         - Updated net of applying the rewrite string
2982
2983     <em>Next Indices:</em>
2984     - <code> adj->rewrite_header.next_index </code>
2985       or @c error-drop 
2986 */
2987 static uword
2988 ip4_rewrite_transit (vlib_main_t * vm,
2989                      vlib_node_runtime_t * node,
2990                      vlib_frame_t * frame)
2991 {
2992   return ip4_rewrite_inline (vm, node, frame,
2993                              /* rewrite_for_locally_received_packets */ 0);
2994 }
2995
2996 /** \brief IPv4 local rewrite node.
2997     @node ip4-rewrite-local
2998
2999     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3000     the outbound interface mtu, apply the adjacency rewrite, and send
3001     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3002     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3003     dst = interface addr."
3004
3005     @param vm vlib_main_t corresponding to the current thread
3006     @param node vlib_node_runtime_t
3007     @param frame vlib_frame_t whose contents should be dispatched
3008
3009     @par Graph mechanics: buffer metadata, next index usage
3010
3011     @em Uses:
3012     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3013         - the rewrite adjacency index
3014     - <code>adj->lookup_next_index</code>
3015         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3016           the packet will be dropped. 
3017     - <code>adj->rewrite_header</code>
3018         - Rewrite string length, rewrite string, next_index
3019
3020     @em Sets:
3021     - <code>b->current_data, b->current_length</code>
3022         - Updated net of applying the rewrite string
3023
3024     <em>Next Indices:</em>
3025     - <code> adj->rewrite_header.next_index </code>
3026       or @c error-drop 
3027 */
3028
3029 static uword
3030 ip4_rewrite_local (vlib_main_t * vm,
3031                    vlib_node_runtime_t * node,
3032                    vlib_frame_t * frame)
3033 {
3034   return ip4_rewrite_inline (vm, node, frame,
3035                              /* rewrite_for_locally_received_packets */ 1);
3036 }
3037
3038 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3039   .function = ip4_rewrite_transit,
3040   .name = "ip4-rewrite-transit",
3041   .vector_size = sizeof (u32),
3042
3043   .format_trace = format_ip4_rewrite_trace,
3044
3045   .n_next_nodes = 3,
3046   .next_nodes = {
3047     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3048     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3049     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3050   },
3051 };
3052
3053 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3054
3055 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3056   .function = ip4_rewrite_local,
3057   .name = "ip4-rewrite-local",
3058   .vector_size = sizeof (u32),
3059
3060   .sibling_of = "ip4-rewrite-transit",
3061
3062   .format_trace = format_ip4_rewrite_trace,
3063
3064   .n_next_nodes = 0,
3065 };
3066
3067 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3068
3069 static clib_error_t *
3070 add_del_interface_table (vlib_main_t * vm,
3071                          unformat_input_t * input,
3072                          vlib_cli_command_t * cmd)
3073 {
3074   vnet_main_t * vnm = vnet_get_main();
3075   clib_error_t * error = 0;
3076   u32 sw_if_index, table_id;
3077
3078   sw_if_index = ~0;
3079
3080   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3081     {
3082       error = clib_error_return (0, "unknown interface `%U'",
3083                                  format_unformat_error, input);
3084       goto done;
3085     }
3086
3087   if (unformat (input, "%d", &table_id))
3088     ;
3089   else
3090     {
3091       error = clib_error_return (0, "expected table id `%U'",
3092                                  format_unformat_error, input);
3093       goto done;
3094     }
3095
3096   {
3097     ip4_main_t * im = &ip4_main;
3098     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3099
3100     if (fib) 
3101       {
3102         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3103         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3104     }
3105   }
3106
3107  done:
3108   return error;
3109 }
3110
3111 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3112   .path = "set interface ip table",
3113   .function = add_del_interface_table,
3114   .short_help = "Add/delete FIB table id for interface",
3115 };
3116
3117
3118 static uword
3119 ip4_lookup_multicast (vlib_main_t * vm,
3120                       vlib_node_runtime_t * node,
3121                       vlib_frame_t * frame)
3122 {
3123   ip4_main_t * im = &ip4_main;
3124   ip_lookup_main_t * lm = &im->lookup_main;
3125   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3126   u32 n_left_from, n_left_to_next, * from, * to_next;
3127   ip_lookup_next_t next;
3128   u32 cpu_index = os_get_cpu_number();
3129
3130   from = vlib_frame_vector_args (frame);
3131   n_left_from = frame->n_vectors;
3132   next = node->cached_next_index;
3133
3134   while (n_left_from > 0)
3135     {
3136       vlib_get_next_frame (vm, node, next,
3137                            to_next, n_left_to_next);
3138
3139       while (n_left_from >= 4 && n_left_to_next >= 2)
3140         {
3141           vlib_buffer_t * p0, * p1;
3142           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3143           ip_lookup_next_t next0, next1;
3144           ip4_header_t * ip0, * ip1;
3145           ip_adjacency_t * adj0, * adj1;
3146           u32 fib_index0, fib_index1;
3147           u32 flow_hash_config0, flow_hash_config1;
3148
3149           /* Prefetch next iteration. */
3150           {
3151             vlib_buffer_t * p2, * p3;
3152
3153             p2 = vlib_get_buffer (vm, from[2]);
3154             p3 = vlib_get_buffer (vm, from[3]);
3155
3156             vlib_prefetch_buffer_header (p2, LOAD);
3157             vlib_prefetch_buffer_header (p3, LOAD);
3158
3159             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3160             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3161           }
3162
3163           pi0 = to_next[0] = from[0];
3164           pi1 = to_next[1] = from[1];
3165
3166           p0 = vlib_get_buffer (vm, pi0);
3167           p1 = vlib_get_buffer (vm, pi1);
3168
3169           ip0 = vlib_buffer_get_current (p0);
3170           ip1 = vlib_buffer_get_current (p1);
3171
3172           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3173           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3174           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3175             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3176           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3177             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3178
3179           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3180                                               &ip0->dst_address, p0);
3181           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3182                                               &ip1->dst_address, p1);
3183
3184           adj0 = ip_get_adjacency (lm, adj_index0);
3185           adj1 = ip_get_adjacency (lm, adj_index1);
3186
3187           next0 = adj0->lookup_next_index;
3188           next1 = adj1->lookup_next_index;
3189
3190           flow_hash_config0 = 
3191               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3192
3193           flow_hash_config1 = 
3194               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3195
3196           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3197               (ip0, flow_hash_config0);
3198                                                                   
3199           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3200               (ip1, flow_hash_config1);
3201
3202           ASSERT (adj0->n_adj > 0);
3203           ASSERT (adj1->n_adj > 0);
3204           ASSERT (is_pow2 (adj0->n_adj));
3205           ASSERT (is_pow2 (adj1->n_adj));
3206           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3207           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3208
3209           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3210           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3211
3212           if (1) /* $$$$$$ HACK FIXME */
3213           vlib_increment_combined_counter 
3214               (cm, cpu_index, adj_index0, 1,
3215                vlib_buffer_length_in_chain (vm, p0));
3216           if (1) /* $$$$$$ HACK FIXME */
3217           vlib_increment_combined_counter 
3218               (cm, cpu_index, adj_index1, 1,
3219                vlib_buffer_length_in_chain (vm, p1));
3220
3221           from += 2;
3222           to_next += 2;
3223           n_left_to_next -= 2;
3224           n_left_from -= 2;
3225
3226           wrong_next = (next0 != next) + 2*(next1 != next);
3227           if (PREDICT_FALSE (wrong_next != 0))
3228             {
3229               switch (wrong_next)
3230                 {
3231                 case 1:
3232                   /* A B A */
3233                   to_next[-2] = pi1;
3234                   to_next -= 1;
3235                   n_left_to_next += 1;
3236                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3237                   break;
3238
3239                 case 2:
3240                   /* A A B */
3241                   to_next -= 1;
3242                   n_left_to_next += 1;
3243                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3244                   break;
3245
3246                 case 3:
3247                   /* A B C */
3248                   to_next -= 2;
3249                   n_left_to_next += 2;
3250                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3251                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3252                   if (next0 == next1)
3253                     {
3254                       /* A B B */
3255                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3256                       next = next1;
3257                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3258                     }
3259                 }
3260             }
3261         }
3262     
3263       while (n_left_from > 0 && n_left_to_next > 0)
3264         {
3265           vlib_buffer_t * p0;
3266           ip4_header_t * ip0;
3267           u32 pi0, adj_index0;
3268           ip_lookup_next_t next0;
3269           ip_adjacency_t * adj0;
3270           u32 fib_index0;
3271           u32 flow_hash_config0;
3272
3273           pi0 = from[0];
3274           to_next[0] = pi0;
3275
3276           p0 = vlib_get_buffer (vm, pi0);
3277
3278           ip0 = vlib_buffer_get_current (p0);
3279
3280           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3281                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3282           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3283               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3284           
3285           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3286                                               &ip0->dst_address, p0);
3287
3288           adj0 = ip_get_adjacency (lm, adj_index0);
3289
3290           next0 = adj0->lookup_next_index;
3291
3292           flow_hash_config0 = 
3293               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3294
3295           vnet_buffer (p0)->ip.flow_hash = 
3296             ip4_compute_flow_hash (ip0, flow_hash_config0);
3297
3298           ASSERT (adj0->n_adj > 0);
3299           ASSERT (is_pow2 (adj0->n_adj));
3300           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3301
3302           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3303
3304           if (1) /* $$$$$$ HACK FIXME */
3305               vlib_increment_combined_counter 
3306                   (cm, cpu_index, adj_index0, 1,
3307                    vlib_buffer_length_in_chain (vm, p0));
3308
3309           from += 1;
3310           to_next += 1;
3311           n_left_to_next -= 1;
3312           n_left_from -= 1;
3313
3314           if (PREDICT_FALSE (next0 != next))
3315             {
3316               n_left_to_next += 1;
3317               vlib_put_next_frame (vm, node, next, n_left_to_next);
3318               next = next0;
3319               vlib_get_next_frame (vm, node, next,
3320                                    to_next, n_left_to_next);
3321               to_next[0] = pi0;
3322               to_next += 1;
3323               n_left_to_next -= 1;
3324             }
3325         }
3326
3327       vlib_put_next_frame (vm, node, next, n_left_to_next);
3328     }
3329
3330   if (node->flags & VLIB_NODE_FLAG_TRACE)
3331       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3332
3333   return frame->n_vectors;
3334 }
3335
3336 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3337   .function = ip4_lookup_multicast,
3338   .name = "ip4-lookup-multicast",
3339   .vector_size = sizeof (u32),
3340   .sibling_of = "ip4-lookup",
3341   .format_trace = format_ip4_lookup_trace,
3342
3343   .n_next_nodes = 0,
3344 };
3345
3346 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3347
3348 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3349   .function = ip4_drop,
3350   .name = "ip4-multicast",
3351   .vector_size = sizeof (u32),
3352
3353   .format_trace = format_ip4_forward_next_trace,
3354
3355   .n_next_nodes = 1,
3356   .next_nodes = {
3357     [0] = "error-drop",
3358   },
3359 };
3360
3361 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3362 {
3363   ip4_main_t * im = &ip4_main;
3364   ip4_fib_mtrie_t * mtrie0;
3365   ip4_fib_mtrie_leaf_t leaf0;
3366   u32 adj_index0;
3367     
3368   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3369
3370   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3371   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3372   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3373   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3374   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3375   
3376   /* Handle default route. */
3377   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3378   
3379   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3380   
3381   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3382                                                   a, 
3383                                                   /* no_default_route */ 0);
3384 }
3385  
3386 static clib_error_t *
3387 test_lookup_command_fn (vlib_main_t * vm,
3388                         unformat_input_t * input,
3389                         vlib_cli_command_t * cmd)
3390 {
3391   u32 table_id = 0;
3392   f64 count = 1;
3393   u32 n;
3394   int i;
3395   ip4_address_t ip4_base_address;
3396   u64 errors = 0;
3397
3398   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3399       if (unformat (input, "table %d", &table_id))
3400         ;
3401       else if (unformat (input, "count %f", &count))
3402         ;
3403
3404       else if (unformat (input, "%U",
3405                          unformat_ip4_address, &ip4_base_address))
3406         ;
3407       else
3408         return clib_error_return (0, "unknown input `%U'",
3409                                   format_unformat_error, input);
3410   }
3411
3412   n = count;
3413
3414   for (i = 0; i < n; i++)
3415     {
3416       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3417         errors++;
3418
3419       ip4_base_address.as_u32 = 
3420         clib_host_to_net_u32 (1 + 
3421                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3422     }
3423
3424   if (errors) 
3425     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3426   else
3427     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3428
3429   return 0;
3430 }
3431
3432 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3433     .path = "test lookup",
3434     .short_help = "test lookup",
3435     .function = test_lookup_command_fn,
3436 };
3437
3438 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3439 {
3440   ip4_main_t * im4 = &ip4_main;
3441   ip4_fib_t * fib;
3442   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3443
3444   if (p == 0)
3445     return VNET_API_ERROR_NO_SUCH_FIB;
3446
3447   fib = vec_elt_at_index (im4->fibs, p[0]);
3448
3449   fib->flow_hash_config = flow_hash_config;
3450   return 0;
3451 }
3452  
3453 static clib_error_t *
3454 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3455                              unformat_input_t * input,
3456                              vlib_cli_command_t * cmd)
3457 {
3458   int matched = 0;
3459   u32 table_id = 0;
3460   u32 flow_hash_config = 0;
3461   int rv;
3462
3463   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3464     if (unformat (input, "table %d", &table_id))
3465       matched = 1;
3466 #define _(a,v) \
3467     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3468     foreach_flow_hash_bit
3469 #undef _
3470     else break;
3471   }
3472   
3473   if (matched == 0)
3474     return clib_error_return (0, "unknown input `%U'",
3475                               format_unformat_error, input);
3476   
3477   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3478   switch (rv)
3479     {
3480     case 0:
3481       break;
3482       
3483     case VNET_API_ERROR_NO_SUCH_FIB:
3484       return clib_error_return (0, "no such FIB table %d", table_id);
3485       
3486     default:
3487       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3488       break;
3489     }
3490   
3491   return 0;
3492 }
3493  
3494 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3495   .path = "set ip flow-hash",
3496   .short_help = 
3497   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3498   .function = set_ip_flow_hash_command_fn,
3499 };
3500  
3501 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3502                                  u32 table_index)
3503 {
3504   vnet_main_t * vnm = vnet_get_main();
3505   vnet_interface_main_t * im = &vnm->interface_main;
3506   ip4_main_t * ipm = &ip4_main;
3507   ip_lookup_main_t * lm = &ipm->lookup_main;
3508   vnet_classify_main_t * cm = &vnet_classify_main;
3509
3510   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3511     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3512
3513   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3514     return VNET_API_ERROR_NO_SUCH_ENTRY;
3515
3516   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3517   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3518
3519   return 0;
3520 }
3521
3522 static clib_error_t *
3523 set_ip_classify_command_fn (vlib_main_t * vm,
3524                             unformat_input_t * input,
3525                             vlib_cli_command_t * cmd)
3526 {
3527   u32 table_index = ~0;
3528   int table_index_set = 0;
3529   u32 sw_if_index = ~0;
3530   int rv;
3531   
3532   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3533     if (unformat (input, "table-index %d", &table_index))
3534       table_index_set = 1;
3535     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3536                        vnet_get_main(), &sw_if_index))
3537       ;
3538     else
3539       break;
3540   }
3541       
3542   if (table_index_set == 0)
3543     return clib_error_return (0, "classify table-index must be specified");
3544
3545   if (sw_if_index == ~0)
3546     return clib_error_return (0, "interface / subif must be specified");
3547
3548   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3549
3550   switch (rv)
3551     {
3552     case 0:
3553       break;
3554
3555     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3556       return clib_error_return (0, "No such interface");
3557
3558     case VNET_API_ERROR_NO_SUCH_ENTRY:
3559       return clib_error_return (0, "No such classifier table");
3560     }
3561   return 0;
3562 }
3563
3564 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3565     .path = "set ip classify",
3566     .short_help = 
3567     "set ip classify intfc <int> table-index <index>",
3568     .function = set_ip_classify_command_fn,
3569 };
3570