VPP-142 Follow up fix for shared_count of indirect adjacencies
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       /* hash_get should never return NULL here */
197       if (p)
198           clib_memcpy (p, fib->new_hash_values, 
199                        vec_bytes (fib->new_hash_values));
200       else
201           ASSERT(0);
202     }
203 }
204
205 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
206 {
207   ip_lookup_main_t * lm = &im->lookup_main;
208   ip4_fib_t * fib;
209   u32 dst_address, dst_address_length, adj_index, old_adj_index;
210   uword * hash, is_del;
211   ip4_add_del_route_callback_t * cb;
212
213   /* Either create new adjacency or use given one depending on arguments. */
214   if (a->n_add_adj > 0)
215     {
216       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
217       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
218     }
219   else
220     adj_index = a->adj_index;
221
222   dst_address = a->dst_address.data_u32;
223   dst_address_length = a->dst_address_length;
224   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
225
226   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
227   dst_address &= im->fib_masks[dst_address_length];
228
229   if (! fib->adj_index_by_dst_address[dst_address_length])
230     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
231
232   hash = fib->adj_index_by_dst_address[dst_address_length];
233
234   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
235
236   if (is_del)
237     {
238       fib->old_hash_values[0] = ~0;
239       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
240       fib->adj_index_by_dst_address[dst_address_length] = hash;
241
242       if (vec_len (im->add_del_route_callbacks) > 0
243           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
244         {
245           fib->new_hash_values[0] = ~0;
246           vec_foreach (cb, im->add_del_route_callbacks)
247             if ((a->flags & cb->required_flags) == cb->required_flags)
248               cb->function (im, cb->function_opaque,
249                             fib, a->flags,
250                             &a->dst_address, dst_address_length,
251                             fib->old_hash_values,
252                             fib->new_hash_values);
253         }
254     }
255   else
256     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
257                            adj_index);
258
259   old_adj_index = fib->old_hash_values[0];
260
261   /* Avoid spurious reference count increments */
262   if (old_adj_index == adj_index
263       && adj_index != ~0
264       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
265     {
266       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
267       if (adj->share_count > 0)
268         adj->share_count --;
269     }
270
271   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
272                                is_del ? old_adj_index : adj_index,
273                                is_del);
274
275   /* Delete old adjacency index if present and changed. */
276   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
277       && old_adj_index != ~0
278       && old_adj_index != adj_index)
279     ip_del_adjacency (lm, old_adj_index);
280 }
281
282
283 u32
284 ip4_route_get_next_hop_adj (ip4_main_t * im,
285                             u32 fib_index,
286                             ip4_address_t *next_hop,
287                             u32 next_hop_sw_if_index,
288                             u32 explicit_fib_index)
289 {
290   ip_lookup_main_t * lm = &im->lookup_main;
291   vnet_main_t * vnm = vnet_get_main();
292   uword * nh_hash, * nh_result;
293   int is_interface_next_hop;
294   u32 nh_adj_index;
295   ip4_fib_t * fib;
296
297   fib = vec_elt_at_index (im->fibs, fib_index);
298
299   is_interface_next_hop = next_hop->data_u32 == 0;
300   if (is_interface_next_hop)
301     {
302       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
303       if (nh_result)
304           nh_adj_index = *nh_result;
305       else
306         {
307            ip_adjacency_t * adj;
308            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
309                                    &nh_adj_index);
310            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
311            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
312            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
313         }
314     }
315   else if (next_hop_sw_if_index == ~0)
316     {
317       /* next-hop is recursive. we always need a indirect adj
318        * for recursive paths. Any LPM we perform now will give
319        * us a valid adj, but without tracking the next-hop we
320        * have no way to keep it valid.
321        */
322       ip_adjacency_t add_adj;
323       memset (&add_adj, 0, sizeof(add_adj));
324       add_adj.n_adj = 1;
325       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
326       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
327       add_adj.explicit_fib_index = explicit_fib_index;
328       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
329     }
330   else
331     {
332       nh_hash = fib->adj_index_by_dst_address[32];
333       nh_result = hash_get (nh_hash, next_hop->data_u32);
334
335       /* Next hop must be known. */
336       if (! nh_result)
337         {
338           ip_adjacency_t * adj;
339
340           /* no /32 exists, get the longest prefix match */
341           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
342                                                     next_hop, 0);
343           adj = ip_get_adjacency (lm, nh_adj_index);
344           /* if ARP interface adjacency is present, we need to
345              install ARP adjaceny for specific next hop */
346           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
347               adj->arp.next_hop.ip4.as_u32 == 0)
348             {
349               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
350             }
351         }
352       else
353         {
354           nh_adj_index = *nh_result;
355         }
356     }
357
358   return (nh_adj_index);
359 }
360
361 void
362 ip4_add_del_route_next_hop (ip4_main_t * im,
363                             u32 flags,
364                             ip4_address_t * dst_address,
365                             u32 dst_address_length,
366                             ip4_address_t * next_hop,
367                             u32 next_hop_sw_if_index,
368                             u32 next_hop_weight, u32 adj_index, 
369                             u32 explicit_fib_index)
370 {
371   vnet_main_t * vnm = vnet_get_main();
372   ip_lookup_main_t * lm = &im->lookup_main;
373   u32 fib_index;
374   ip4_fib_t * fib;
375   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
376   u32 dst_adj_index, nh_adj_index;
377   uword * dst_hash, * dst_result;
378   ip_adjacency_t * dst_adj;
379   ip_multipath_adjacency_t * old_mp, * new_mp;
380   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
381   clib_error_t * error = 0;
382
383   if (explicit_fib_index == (u32)~0)
384       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
385   else
386       fib_index = explicit_fib_index;
387
388   fib = vec_elt_at_index (im->fibs, fib_index);
389
390   /* Lookup next hop to be added or deleted. */
391   if (adj_index == (u32)~0)
392     {
393         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
394                                                   next_hop,
395                                                   next_hop_sw_if_index,
396                                                   explicit_fib_index);
397     }
398   else
399     {
400       nh_adj_index = adj_index;
401     }
402   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
403   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
404
405   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
406   dst_result = hash_get (dst_hash, dst_address_u32);
407   if (dst_result)
408     {
409       dst_adj_index = dst_result[0];
410       dst_adj = ip_get_adjacency (lm, dst_adj_index);
411     }
412   else
413     {
414       /* For deletes destination must be known. */
415       if (is_del)
416         {
417           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
418           error = clib_error_return (0, "unknown destination %U/%d",
419                                      format_ip4_address, dst_address,
420                                      dst_address_length);
421           goto done;
422         }
423
424       dst_adj_index = ~0;
425       dst_adj = 0;
426     }
427
428   /* Ignore adds of X/32 with next hop of X. */
429   if (! is_del
430       && dst_address_length == 32
431       && dst_address->data_u32 == next_hop->data_u32 
432       && adj_index != (u32)~0)
433     {
434       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
435       error = clib_error_return (0, "prefix matches next hop %U/%d",
436                                  format_ip4_address, dst_address,
437                                  dst_address_length);
438       goto done;
439     }
440
441   /* Destination is not known and default weight is set so add route
442      to existing non-multipath adjacency */
443   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
444     {
445       /* create / delete additional mapping of existing adjacency */
446       ip4_add_del_route_args_t a;
447
448       a.table_index_or_table_id = fib_index;
449       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
450                  | IP4_ROUTE_FLAG_FIB_INDEX
451                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
452                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
453                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
454       a.dst_address = dst_address[0];
455       a.dst_address_length = dst_address_length;
456       a.adj_index = nh_adj_index;
457       a.add_adj = 0;
458       a.n_add_adj = 0;
459
460       ip4_add_del_route (im, &a);
461       goto done;
462     }
463
464   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
465
466   if (! ip_multipath_adjacency_add_del_next_hop
467       (lm, is_del,
468        old_mp_adj_index,
469        nh_adj_index,
470        next_hop_weight,
471        &new_mp_adj_index))
472     {
473       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
474       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
475                                  format_ip4_address, next_hop);
476       goto done;
477     }
478   
479   old_mp = new_mp = 0;
480   if (old_mp_adj_index != ~0)
481     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
482   if (new_mp_adj_index != ~0)
483     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
484
485   if (old_mp != new_mp)
486     {
487       ip4_add_del_route_args_t a;
488       ip_adjacency_t * adj;
489
490       a.table_index_or_table_id = fib_index;
491       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
492                  | IP4_ROUTE_FLAG_FIB_INDEX
493                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
494                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
495       a.dst_address = dst_address[0];
496       a.dst_address_length = dst_address_length;
497       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
498       a.add_adj = 0;
499       a.n_add_adj = 0;
500
501       ip4_add_del_route (im, &a);
502
503       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
504       if (adj->n_adj == 1)
505         adj->share_count += is_del ? -1 : 1;
506     }
507
508  done:
509   if (error)
510     clib_error_report (error);
511 }
512
513 void *
514 ip4_get_route (ip4_main_t * im,
515                u32 table_index_or_table_id,
516                u32 flags,
517                u8 * address,
518                u32 address_length)
519 {
520   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
521   u32 dst_address = * (u32 *) address;
522   uword * hash, * p;
523
524   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
525   dst_address &= im->fib_masks[address_length];
526
527   hash = fib->adj_index_by_dst_address[address_length];
528   p = hash_get (hash, dst_address);
529   return (void *) p;
530 }
531
532 void
533 ip4_foreach_matching_route (ip4_main_t * im,
534                             u32 table_index_or_table_id,
535                             u32 flags,
536                             ip4_address_t * address,
537                             u32 address_length,
538                             ip4_address_t ** results,
539                             u8 ** result_lengths)
540 {
541   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
542   u32 dst_address = address->data_u32;
543   u32 this_length = address_length;
544   
545   if (*results)
546     _vec_len (*results) = 0;
547   if (*result_lengths)
548     _vec_len (*result_lengths) = 0;
549
550   while (this_length <= 32 && vec_len (results) == 0)
551     {
552       uword k, v;
553       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
554         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
555           {
556             ip4_address_t a;
557             a.data_u32 = k;
558             vec_add1 (*results, a);
559             vec_add1 (*result_lengths, this_length);
560           }
561       }));
562
563       this_length++;
564     }
565 }
566
567 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
568                                   u32 table_index_or_table_id,
569                                   u32 flags)
570 {
571   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
572   ip_lookup_main_t * lm = &im->lookup_main;
573   u32 i, l;
574   ip4_address_t a;
575   ip4_add_del_route_callback_t * cb;
576   static ip4_address_t * to_delete;
577
578   if (lm->n_adjacency_remaps == 0)
579     return;
580
581   for (l = 0; l <= 32; l++)
582     {
583       hash_pair_t * p;
584       uword * hash = fib->adj_index_by_dst_address[l];
585
586       if (hash_elts (hash) == 0)
587         continue;
588
589       if (to_delete)
590         _vec_len (to_delete) = 0;
591
592       hash_foreach_pair (p, hash, ({
593         u32 adj_index = p->value[0];
594         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
595
596         if (m)
597           {
598             /* Record destination address from hash key. */
599             a.data_u32 = p->key;
600
601             /* New adjacency points to nothing: so delete prefix. */
602             if (m == ~0)
603               vec_add1 (to_delete, a);
604             else
605               {
606                 /* Remap to new adjacency. */
607                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
608
609                 /* Set new adjacency value. */
610                 fib->new_hash_values[0] = p->value[0] = m - 1;
611
612                 vec_foreach (cb, im->add_del_route_callbacks)
613                   if ((flags & cb->required_flags) == cb->required_flags)
614                     cb->function (im, cb->function_opaque,
615                                   fib, flags | IP4_ROUTE_FLAG_ADD,
616                                   &a, l,
617                                   fib->old_hash_values,
618                                   fib->new_hash_values);
619               }
620           }
621       }));
622
623       fib->new_hash_values[0] = ~0;
624       for (i = 0; i < vec_len (to_delete); i++)
625         {
626           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
627           vec_foreach (cb, im->add_del_route_callbacks)
628             if ((flags & cb->required_flags) == cb->required_flags)
629               cb->function (im, cb->function_opaque,
630                             fib, flags | IP4_ROUTE_FLAG_DEL,
631                             &a, l,
632                             fib->old_hash_values,
633                             fib->new_hash_values);
634         }
635     }
636
637   /* Also remap adjacencies in mtrie. */
638   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
639
640   /* Reset mapping table. */
641   vec_zero (lm->adjacency_remap_table);
642
643   /* All remaps have been performed. */
644   lm->n_adjacency_remaps = 0;
645 }
646
647 void ip4_delete_matching_routes (ip4_main_t * im,
648                                  u32 table_index_or_table_id,
649                                  u32 flags,
650                                  ip4_address_t * address,
651                                  u32 address_length)
652 {
653   static ip4_address_t * matching_addresses;
654   static u8 * matching_address_lengths;
655   u32 l, i;
656   ip4_add_del_route_args_t a;
657
658   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
659   a.table_index_or_table_id = table_index_or_table_id;
660   a.adj_index = ~0;
661   a.add_adj = 0;
662   a.n_add_adj = 0;
663
664   for (l = address_length + 1; l <= 32; l++)
665     {
666       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
667                                   address,
668                                   l,
669                                   &matching_addresses,
670                                   &matching_address_lengths);
671       for (i = 0; i < vec_len (matching_addresses); i++)
672         {
673           a.dst_address = matching_addresses[i];
674           a.dst_address_length = matching_address_lengths[i];
675           ip4_add_del_route (im, &a);
676         }
677     }
678
679   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
680 }
681
682 void
683 ip4_forward_next_trace (vlib_main_t * vm,
684                         vlib_node_runtime_t * node,
685                         vlib_frame_t * frame,
686                         vlib_rx_or_tx_t which_adj_index);
687
688 always_inline uword
689 ip4_lookup_inline (vlib_main_t * vm,
690                    vlib_node_runtime_t * node,
691                    vlib_frame_t * frame,
692                    int lookup_for_responses_to_locally_received_packets,
693                    int is_indirect)
694 {
695   ip4_main_t * im = &ip4_main;
696   ip_lookup_main_t * lm = &im->lookup_main;
697   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
698   u32 n_left_from, n_left_to_next, * from, * to_next;
699   ip_lookup_next_t next;
700   u32 cpu_index = os_get_cpu_number();
701
702   from = vlib_frame_vector_args (frame);
703   n_left_from = frame->n_vectors;
704   next = node->cached_next_index;
705
706   while (n_left_from > 0)
707     {
708       vlib_get_next_frame (vm, node, next,
709                            to_next, n_left_to_next);
710
711       while (n_left_from >= 4 && n_left_to_next >= 2)
712         {
713           vlib_buffer_t * p0, * p1;
714           ip4_header_t * ip0, * ip1;
715           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
716           ip_lookup_next_t next0, next1;
717           ip_adjacency_t * adj0, * adj1;
718           ip4_fib_mtrie_t * mtrie0, * mtrie1;
719           ip4_fib_mtrie_leaf_t leaf0, leaf1;
720           ip4_address_t * dst_addr0, *dst_addr1;
721           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
722           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
723           u32 flow_hash_config0, flow_hash_config1;
724           u32 hash_c0, hash_c1;
725           u32 wrong_next;
726
727           /* Prefetch next iteration. */
728           {
729             vlib_buffer_t * p2, * p3;
730
731             p2 = vlib_get_buffer (vm, from[2]);
732             p3 = vlib_get_buffer (vm, from[3]);
733
734             vlib_prefetch_buffer_header (p2, LOAD);
735             vlib_prefetch_buffer_header (p3, LOAD);
736
737             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
738             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
739           }
740
741           pi0 = to_next[0] = from[0];
742           pi1 = to_next[1] = from[1];
743
744           p0 = vlib_get_buffer (vm, pi0);
745           p1 = vlib_get_buffer (vm, pi1);
746
747           ip0 = vlib_buffer_get_current (p0);
748           ip1 = vlib_buffer_get_current (p1);
749
750           if (is_indirect)
751             {
752               ip_adjacency_t * iadj0, * iadj1;
753               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
754               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
755               dst_addr0 = &iadj0->indirect.next_hop.ip4;
756               dst_addr1 = &iadj1->indirect.next_hop.ip4;
757             }
758           else
759             {
760               dst_addr0 = &ip0->dst_address;
761               dst_addr1 = &ip1->dst_address;
762             }
763
764           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
765           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
766           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
767             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
768           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
769             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
770
771
772           if (! lookup_for_responses_to_locally_received_packets)
773             {
774               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
775               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
776
777               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
778
779               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
780               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
781             }
782
783           tcp0 = (void *) (ip0 + 1);
784           tcp1 = (void *) (ip1 + 1);
785
786           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
787                          || ip0->protocol == IP_PROTOCOL_UDP);
788           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
789                          || ip1->protocol == IP_PROTOCOL_UDP);
790
791           if (! lookup_for_responses_to_locally_received_packets)
792             {
793               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
794               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
795             }
796
797           if (! lookup_for_responses_to_locally_received_packets)
798             {
799               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
800               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
801             }
802
803           if (! lookup_for_responses_to_locally_received_packets)
804             {
805               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
806               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
807             }
808
809           if (lookup_for_responses_to_locally_received_packets)
810             {
811               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
812               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
813             }
814           else
815             {
816               /* Handle default route. */
817               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
818               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
819
820               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
821               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
822             }
823
824           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
825                                                            dst_addr0,
826                                                            /* no_default_route */ 0));
827           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
828                                                            dst_addr1,
829                                                            /* no_default_route */ 0));
830           adj0 = ip_get_adjacency (lm, adj_index0);
831           adj1 = ip_get_adjacency (lm, adj_index1);
832
833           next0 = adj0->lookup_next_index;
834           next1 = adj1->lookup_next_index;
835
836           /* Use flow hash to compute multipath adjacency. */
837           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
838           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
839           if (PREDICT_FALSE (adj0->n_adj > 1))
840             {
841               flow_hash_config0 = 
842                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
843               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
844                 ip4_compute_flow_hash (ip0, flow_hash_config0);
845             }
846           if (PREDICT_FALSE(adj1->n_adj > 1))
847             {
848               flow_hash_config1 = 
849                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
850               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
851                 ip4_compute_flow_hash (ip1, flow_hash_config1);
852             }
853
854           ASSERT (adj0->n_adj > 0);
855           ASSERT (adj1->n_adj > 0);
856           ASSERT (is_pow2 (adj0->n_adj));
857           ASSERT (is_pow2 (adj1->n_adj));
858           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
859           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
860
861           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
862           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
863
864           vlib_increment_combined_counter 
865               (cm, cpu_index, adj_index0, 1,
866                vlib_buffer_length_in_chain (vm, p0) 
867                + sizeof(ethernet_header_t));
868           vlib_increment_combined_counter 
869               (cm, cpu_index, adj_index1, 1,
870                vlib_buffer_length_in_chain (vm, p1)
871                + sizeof(ethernet_header_t));
872
873           from += 2;
874           to_next += 2;
875           n_left_to_next -= 2;
876           n_left_from -= 2;
877
878           wrong_next = (next0 != next) + 2*(next1 != next);
879           if (PREDICT_FALSE (wrong_next != 0))
880             {
881               switch (wrong_next)
882                 {
883                 case 1:
884                   /* A B A */
885                   to_next[-2] = pi1;
886                   to_next -= 1;
887                   n_left_to_next += 1;
888                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
889                   break;
890
891                 case 2:
892                   /* A A B */
893                   to_next -= 1;
894                   n_left_to_next += 1;
895                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
896                   break;
897
898                 case 3:
899                   /* A B C */
900                   to_next -= 2;
901                   n_left_to_next += 2;
902                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
903                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
904                   if (next0 == next1)
905                     {
906                       /* A B B */
907                       vlib_put_next_frame (vm, node, next, n_left_to_next);
908                       next = next1;
909                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
910                     }
911                 }
912             }
913         }
914     
915       while (n_left_from > 0 && n_left_to_next > 0)
916         {
917           vlib_buffer_t * p0;
918           ip4_header_t * ip0;
919           __attribute__((unused)) tcp_header_t * tcp0;
920           ip_lookup_next_t next0;
921           ip_adjacency_t * adj0;
922           ip4_fib_mtrie_t * mtrie0;
923           ip4_fib_mtrie_leaf_t leaf0;
924           ip4_address_t * dst_addr0;
925           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
926           u32 flow_hash_config0, hash_c0;
927
928           pi0 = from[0];
929           to_next[0] = pi0;
930
931           p0 = vlib_get_buffer (vm, pi0);
932
933           ip0 = vlib_buffer_get_current (p0);
934
935           if (is_indirect)
936             {
937               ip_adjacency_t * iadj0;
938               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
939               dst_addr0 = &iadj0->indirect.next_hop.ip4;
940             }
941           else
942             {
943               dst_addr0 = &ip0->dst_address;
944             }
945
946           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
947           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
948             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
949
950           if (! lookup_for_responses_to_locally_received_packets)
951             {
952               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
953
954               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
955
956               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
957             }
958
959           tcp0 = (void *) (ip0 + 1);
960
961           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
962                          || ip0->protocol == IP_PROTOCOL_UDP);
963
964           if (! lookup_for_responses_to_locally_received_packets)
965             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
966
967           if (! lookup_for_responses_to_locally_received_packets)
968             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
969
970           if (! lookup_for_responses_to_locally_received_packets)
971             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
972
973           if (lookup_for_responses_to_locally_received_packets)
974             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
975           else
976             {
977               /* Handle default route. */
978               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
979               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
980             }
981
982           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
983                                                            dst_addr0,
984                                                            /* no_default_route */ 0));
985
986           adj0 = ip_get_adjacency (lm, adj_index0);
987
988           next0 = adj0->lookup_next_index;
989
990           /* Use flow hash to compute multipath adjacency. */
991           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
992           if (PREDICT_FALSE(adj0->n_adj > 1))
993             {
994               flow_hash_config0 = 
995                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
996
997               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
998                 ip4_compute_flow_hash (ip0, flow_hash_config0);
999             }
1000
1001           ASSERT (adj0->n_adj > 0);
1002           ASSERT (is_pow2 (adj0->n_adj));
1003           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1004
1005           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1006
1007           vlib_increment_combined_counter 
1008               (cm, cpu_index, adj_index0, 1,
1009                vlib_buffer_length_in_chain (vm, p0)
1010                + sizeof(ethernet_header_t));
1011
1012           from += 1;
1013           to_next += 1;
1014           n_left_to_next -= 1;
1015           n_left_from -= 1;
1016
1017           if (PREDICT_FALSE (next0 != next))
1018             {
1019               n_left_to_next += 1;
1020               vlib_put_next_frame (vm, node, next, n_left_to_next);
1021               next = next0;
1022               vlib_get_next_frame (vm, node, next,
1023                                    to_next, n_left_to_next);
1024               to_next[0] = pi0;
1025               to_next += 1;
1026               n_left_to_next -= 1;
1027             }
1028         }
1029
1030       vlib_put_next_frame (vm, node, next, n_left_to_next);
1031     }
1032
1033   if (node->flags & VLIB_NODE_FLAG_TRACE)
1034     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1035
1036   return frame->n_vectors;
1037 }
1038
1039 /** @brief IPv4 lookup node.
1040     @node ip4-lookup
1041
1042     This is the main IPv4 lookup dispatch node.
1043
1044     @param vm vlib_main_t corresponding to the current thread
1045     @param node vlib_node_runtime_t
1046     @param frame vlib_frame_t whose contents should be dispatched
1047
1048     @par Graph mechanics: buffer metadata, next index usage
1049
1050     @em Uses:
1051     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1052         - Indicates the @c sw_if_index value of the interface that the
1053           packet was received on.
1054     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1055         - When the value is @c ~0 then the node performs a longest prefix
1056           match (LPM) for the packet destination address in the FIB attached
1057           to the receive interface.
1058         - Otherwise perform LPM for the packet destination address in the
1059           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1060           value (0, 1, ...) and not a VRF id.
1061
1062     @em Sets:
1063     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1064         - The lookup result adjacency index.
1065
1066     <em>Next Index:</em>
1067     - Dispatches the packet to the node index found in
1068       ip_adjacency_t @c adj->lookup_next_index
1069       (where @c adj is the lookup result adjacency).
1070 */
1071 static uword
1072 ip4_lookup (vlib_main_t * vm,
1073             vlib_node_runtime_t * node,
1074             vlib_frame_t * frame)
1075 {
1076   return ip4_lookup_inline (vm, node, frame,
1077                             /* lookup_for_responses_to_locally_received_packets */ 0,
1078                             /* is_indirect */ 0);
1079
1080 }
1081
1082 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1083                                         ip_adjacency_t * adj,
1084                                         u32 sw_if_index,
1085                                         u32 if_address_index)
1086 {
1087   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1088   ip_lookup_next_t n;
1089   vnet_l3_packet_type_t packet_type;
1090   u32 node_index;
1091
1092   if (hw->hw_class_index == ethernet_hw_interface_class.index
1093       || hw->hw_class_index == srp_hw_interface_class.index)
1094     {
1095       /* 
1096        * We have a bit of a problem in this case. ip4-arp uses
1097        * the rewrite_header.next_index to hand pkts to the
1098        * indicated inteface output node. We can end up in
1099        * ip4_rewrite_local, too, which also pays attention to 
1100        * rewrite_header.next index. Net result: a hack in
1101        * ip4_rewrite_local...
1102        */
1103       n = IP_LOOKUP_NEXT_ARP;
1104       node_index = ip4_arp_node.index;
1105       adj->if_address_index = if_address_index;
1106       adj->arp.next_hop.ip4.as_u32 = 0;
1107       ip46_address_reset(&adj->arp.next_hop);
1108       packet_type = VNET_L3_PACKET_TYPE_ARP;
1109     }
1110   else
1111     {
1112       n = IP_LOOKUP_NEXT_REWRITE;
1113       node_index = ip4_rewrite_node.index;
1114       packet_type = VNET_L3_PACKET_TYPE_IP4;
1115     }
1116
1117   adj->lookup_next_index = n;
1118   vnet_rewrite_for_sw_interface
1119     (vnm,
1120      packet_type,
1121      sw_if_index,
1122      node_index,
1123      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1124      &adj->rewrite_header,
1125      sizeof (adj->rewrite_data));
1126 }
1127
1128 static void
1129 ip4_add_interface_routes (u32 sw_if_index,
1130                           ip4_main_t * im, u32 fib_index,
1131                           ip_interface_address_t * a)
1132 {
1133   vnet_main_t * vnm = vnet_get_main();
1134   ip_lookup_main_t * lm = &im->lookup_main;
1135   ip_adjacency_t * adj;
1136   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1137   ip4_add_del_route_args_t x;
1138   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1139   u32 classify_table_index;
1140
1141   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1142   x.table_index_or_table_id = fib_index;
1143   x.flags = (IP4_ROUTE_FLAG_ADD
1144              | IP4_ROUTE_FLAG_FIB_INDEX
1145              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1146   x.dst_address = address[0];
1147   x.dst_address_length = a->address_length;
1148   x.n_add_adj = 0;
1149   x.add_adj = 0;
1150
1151   a->neighbor_probe_adj_index = ~0;
1152   if (a->address_length < 32)
1153     {
1154       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1155                               &x.adj_index);
1156       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1157       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1158       ip4_add_del_route (im, &x);
1159       a->neighbor_probe_adj_index = x.adj_index;
1160     }
1161   
1162   /* Add e.g. 1.1.1.1/32 as local to this host. */
1163   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1164                           &x.adj_index);
1165   
1166   classify_table_index = ~0;
1167   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1168     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1169   if (classify_table_index != (u32) ~0)
1170     {
1171       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1172       adj->classify.table_index = classify_table_index;
1173     }
1174   else
1175     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1176   
1177   adj->if_address_index = a - lm->if_address_pool;
1178   adj->rewrite_header.sw_if_index = sw_if_index;
1179   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1180   /* 
1181    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1182    * fail an RPF-ish check, but still go thru the rewrite code...
1183    */
1184   adj->rewrite_header.data_bytes = 0;
1185
1186   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1187   x.dst_address_length = 32;
1188   ip4_add_del_route (im, &x);
1189 }
1190
1191 static void
1192 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1193 {
1194   ip4_add_del_route_args_t x;
1195
1196   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1197   x.table_index_or_table_id = fib_index;
1198   x.flags = (IP4_ROUTE_FLAG_DEL
1199              | IP4_ROUTE_FLAG_FIB_INDEX
1200              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1201   x.dst_address = address[0];
1202   x.dst_address_length = address_length;
1203   x.adj_index = ~0;
1204   x.n_add_adj = 0;
1205   x.add_adj = 0;
1206
1207   if (address_length < 32)
1208     ip4_add_del_route (im, &x);
1209
1210   x.dst_address_length = 32;
1211   ip4_add_del_route (im, &x);
1212
1213   ip4_delete_matching_routes (im,
1214                               fib_index,
1215                               IP4_ROUTE_FLAG_FIB_INDEX,
1216                               address,
1217                               address_length);
1218 }
1219
1220 typedef struct {
1221     u32 sw_if_index;
1222     ip4_address_t address;
1223     u32 length;
1224 } ip4_interface_address_t;
1225
1226 static clib_error_t *
1227 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1228                                         u32 sw_if_index,
1229                                         ip4_address_t * new_address,
1230                                         u32 new_length,
1231                                         u32 redistribute,
1232                                         u32 insert_routes,
1233                                         u32 is_del);
1234
1235 static clib_error_t *
1236 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1237                                         u32 sw_if_index,
1238                                         ip4_address_t * address,
1239                                         u32 address_length,
1240                                         u32 redistribute,
1241                                         u32 insert_routes,
1242                                         u32 is_del)
1243 {
1244   vnet_main_t * vnm = vnet_get_main();
1245   ip4_main_t * im = &ip4_main;
1246   ip_lookup_main_t * lm = &im->lookup_main;
1247   clib_error_t * error = 0;
1248   u32 if_address_index, elts_before;
1249   ip4_address_fib_t ip4_af, * addr_fib = 0;
1250
1251   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1252   ip4_addr_fib_init (&ip4_af, address,
1253                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1254   vec_add1 (addr_fib, ip4_af);
1255
1256   /* When adding an address check that it does not conflict with an existing address. */
1257   if (! is_del)
1258     {
1259       ip_interface_address_t * ia;
1260       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1261                                     0 /* honor unnumbered */,
1262       ({
1263         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1264
1265         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1266             || ip4_destination_matches_route (im, x, address, address_length))
1267           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1268                                     format_ip4_address_and_length, address, address_length,
1269                                     format_ip4_address_and_length, x, ia->address_length,
1270                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1271       }));
1272     }
1273
1274   elts_before = pool_elts (lm->if_address_pool);
1275
1276   error = ip_interface_address_add_del
1277     (lm,
1278      sw_if_index,
1279      addr_fib,
1280      address_length,
1281      is_del,
1282      &if_address_index);
1283   if (error)
1284     goto done;
1285   
1286   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1287     {
1288       if (is_del)
1289         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1290                                   address_length);
1291       
1292       else
1293           ip4_add_interface_routes (sw_if_index,
1294                                     im, ip4_af.fib_index,
1295                                     pool_elt_at_index 
1296                                     (lm->if_address_pool, if_address_index));
1297     }
1298
1299   /* If pool did not grow/shrink: add duplicate address. */
1300   if (elts_before != pool_elts (lm->if_address_pool))
1301     {
1302       ip4_add_del_interface_address_callback_t * cb;
1303       vec_foreach (cb, im->add_del_interface_address_callbacks)
1304         cb->function (im, cb->function_opaque, sw_if_index,
1305                       address, address_length,
1306                       if_address_index,
1307                       is_del);
1308     }
1309
1310  done:
1311   vec_free (addr_fib);
1312   return error;
1313 }
1314
1315 clib_error_t *
1316 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1317                                ip4_address_t * address, u32 address_length,
1318                                u32 is_del)
1319 {
1320   return ip4_add_del_interface_address_internal
1321     (vm, sw_if_index, address, address_length,
1322      /* redistribute */ 1,
1323      /* insert_routes */ 1,
1324      is_del);
1325 }
1326
1327 static clib_error_t *
1328 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1329                                 u32 sw_if_index,
1330                                 u32 flags)
1331 {
1332   ip4_main_t * im = &ip4_main;
1333   ip_interface_address_t * ia;
1334   ip4_address_t * a;
1335   u32 is_admin_up, fib_index;
1336   
1337   /* Fill in lookup tables with default table (0). */
1338   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1339   
1340   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1341   
1342   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1343   
1344   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1345
1346   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1347                                 0 /* honor unnumbered */,
1348   ({
1349     a = ip_interface_address_get_address (&im->lookup_main, ia);
1350     if (is_admin_up)
1351       ip4_add_interface_routes (sw_if_index,
1352                                 im, fib_index,
1353                                 ia);
1354     else
1355       ip4_del_interface_routes (im, fib_index,
1356                                 a, ia->address_length);
1357   }));
1358
1359   return 0;
1360 }
1361  
1362 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1363
1364 /* Built-in ip4 unicast rx feature path definition */
1365 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1366   .node_name = "ip4-inacl", 
1367   .runs_before = {"ip4-source-check-via-rx", 0}, 
1368   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1369 };
1370
1371 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1372   .node_name = "ip4-source-check-via-rx",
1373   .runs_before = {"ip4-source-check-via-any", 0},
1374   .feature_index = 
1375   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1376 };
1377
1378 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1379   .node_name = "ip4-source-check-via-any",
1380   .runs_before = {"ip4-policer-classify", 0},
1381   .feature_index = 
1382   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1383 };
1384
1385 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1386   .node_name = "ip4-policer-classify",
1387   .runs_before = {"ipsec-input-ip4", 0},
1388   .feature_index =
1389   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1390 };
1391
1392 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1393   .node_name = "ipsec-input-ip4",
1394   .runs_before = {"vpath-input-ip4", 0},
1395   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1396 };
1397
1398 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1399   .node_name = "vpath-input-ip4",
1400   .runs_before = {"ip4-lookup", 0},
1401   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1402 };
1403
1404 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1405   .node_name = "ip4-lookup",
1406   .runs_before = {0}, /* not before any other features */
1407   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1408 };
1409
1410 /* Built-in ip4 multicast rx feature path definition */
1411 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1412   .node_name = "vpath-input-ip4",
1413   .runs_before = {"ip4-lookup-multicast", 0},
1414   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1415 };
1416
1417 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1418   .node_name = "ip4-lookup-multicast",
1419   .runs_before = {0}, /* not before any other features */
1420   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1421 };
1422
1423 static char * feature_start_nodes[] = 
1424   { "ip4-input", "ip4-input-no-checksum"};
1425
1426 static clib_error_t *
1427 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1428 {
1429   ip_lookup_main_t * lm = &im->lookup_main;
1430   clib_error_t * error;
1431   vnet_cast_t cast;
1432
1433   for (cast = 0; cast < VNET_N_CAST; cast++)
1434     {
1435       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1436       vnet_config_main_t * vcm = &cm->config_main;
1437
1438       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1439                                          feature_start_nodes,
1440                                          ARRAY_LEN(feature_start_nodes),
1441                                          cast,
1442                                          1 /* is_ip4 */)))
1443         return error;
1444     }
1445   return 0;
1446 }
1447
1448 static clib_error_t *
1449 ip4_sw_interface_add_del (vnet_main_t * vnm,
1450                           u32 sw_if_index,
1451                           u32 is_add)
1452 {
1453   vlib_main_t * vm = vnm->vlib_main;
1454   ip4_main_t * im = &ip4_main;
1455   ip_lookup_main_t * lm = &im->lookup_main;
1456   u32 ci, cast;
1457   u32 feature_index;
1458
1459   for (cast = 0; cast < VNET_N_CAST; cast++)
1460     {
1461       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1462       vnet_config_main_t * vcm = &cm->config_main;
1463
1464       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1465       ci = cm->config_index_by_sw_if_index[sw_if_index];
1466
1467       if (cast == VNET_UNICAST)
1468         feature_index = im->ip4_unicast_rx_feature_lookup;
1469       else
1470         feature_index = im->ip4_multicast_rx_feature_lookup;
1471
1472       if (is_add)
1473         ci = vnet_config_add_feature (vm, vcm,
1474                                       ci,
1475                                       feature_index,
1476                                       /* config data */ 0,
1477                                       /* # bytes of config data */ 0);
1478       else
1479         ci = vnet_config_del_feature (vm, vcm,
1480                                       ci,
1481                                       feature_index,
1482                                       /* config data */ 0,
1483                                       /* # bytes of config data */ 0);
1484
1485       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1486     }
1487
1488   return /* no error */ 0;
1489 }
1490
1491 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1492
1493 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1494
1495 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1496   .function = ip4_lookup,
1497   .name = "ip4-lookup",
1498   .vector_size = sizeof (u32),
1499
1500   .format_trace = format_ip4_lookup_trace,
1501
1502   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1503   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1504 };
1505
1506 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1507
1508 static uword
1509 ip4_indirect (vlib_main_t * vm,
1510                vlib_node_runtime_t * node,
1511                vlib_frame_t * frame)
1512 {
1513   return ip4_lookup_inline (vm, node, frame,
1514                             /* lookup_for_responses_to_locally_received_packets */ 0,
1515                             /* is_indirect */ 1);
1516 }
1517
1518 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1519   .function = ip4_indirect,
1520   .name = "ip4-indirect",
1521   .vector_size = sizeof (u32),
1522   .sibling_of = "ip4-lookup",
1523   .format_trace = format_ip4_lookup_trace,
1524
1525   .n_next_nodes = 0,
1526 };
1527
1528 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1529
1530
1531 /* Global IP4 main. */
1532 ip4_main_t ip4_main;
1533
1534 clib_error_t *
1535 ip4_lookup_init (vlib_main_t * vm)
1536 {
1537   ip4_main_t * im = &ip4_main;
1538   clib_error_t * error;
1539   uword i;
1540
1541   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1542     {
1543       u32 m;
1544
1545       if (i < 32)
1546         m = pow2_mask (i) << (32 - i);
1547       else 
1548         m = ~0;
1549       im->fib_masks[i] = clib_host_to_net_u32 (m);
1550     }
1551
1552   /* Create FIB with index 0 and table id of 0. */
1553   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1554
1555   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1556
1557   {
1558     pg_node_t * pn;
1559     pn = pg_get_node (ip4_lookup_node.index);
1560     pn->unformat_edit = unformat_pg_ip4_header;
1561   }
1562
1563   {
1564     ethernet_arp_header_t h;
1565
1566     memset (&h, 0, sizeof (h));
1567
1568     /* Set target ethernet address to all zeros. */
1569     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1570
1571 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1572 #define _8(f,v) h.f = v;
1573     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1574     _16 (l3_type, ETHERNET_TYPE_IP4);
1575     _8 (n_l2_address_bytes, 6);
1576     _8 (n_l3_address_bytes, 4);
1577     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1578 #undef _16
1579 #undef _8
1580
1581     vlib_packet_template_init (vm,
1582                                &im->ip4_arp_request_packet_template,
1583                                /* data */ &h,
1584                                sizeof (h),
1585                                /* alloc chunk size */ 8,
1586                                "ip4 arp");
1587   }
1588
1589   error = ip4_feature_init (vm, im);
1590
1591   return error;
1592 }
1593
1594 VLIB_INIT_FUNCTION (ip4_lookup_init);
1595
1596 typedef struct {
1597   /* Adjacency taken. */
1598   u32 adj_index;
1599   u32 flow_hash;
1600   u32 fib_index;
1601
1602   /* Packet data, possibly *after* rewrite. */
1603   u8 packet_data[64 - 1*sizeof(u32)];
1604 } ip4_forward_next_trace_t;
1605
1606 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1607 {
1608   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1609   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1610   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1611   uword indent = format_get_indent (s);
1612   s = format (s, "%U%U",
1613                 format_white_space, indent,
1614                 format_ip4_header, t->packet_data);
1615   return s;
1616 }
1617
1618 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1619 {
1620   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1621   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1622   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1623   vnet_main_t * vnm = vnet_get_main();
1624   ip4_main_t * im = &ip4_main;
1625   uword indent = format_get_indent (s);
1626
1627   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1628               t->fib_index, t->adj_index, format_ip_adjacency,
1629               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1630   s = format (s, "\n%U%U",
1631               format_white_space, indent,
1632               format_ip4_header, t->packet_data);
1633   return s;
1634 }
1635
1636 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1637 {
1638   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1639   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1640   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1641   vnet_main_t * vnm = vnet_get_main();
1642   ip4_main_t * im = &ip4_main;
1643   uword indent = format_get_indent (s);
1644
1645   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1646               t->fib_index, t->adj_index, format_ip_adjacency,
1647               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1648   s = format (s, "\n%U%U",
1649               format_white_space, indent,
1650               format_ip_adjacency_packet_data,
1651               vnm, &im->lookup_main, t->adj_index,
1652               t->packet_data, sizeof (t->packet_data));
1653   return s;
1654 }
1655
1656 /* Common trace function for all ip4-forward next nodes. */
1657 void
1658 ip4_forward_next_trace (vlib_main_t * vm,
1659                         vlib_node_runtime_t * node,
1660                         vlib_frame_t * frame,
1661                         vlib_rx_or_tx_t which_adj_index)
1662 {
1663   u32 * from, n_left;
1664   ip4_main_t * im = &ip4_main;
1665
1666   n_left = frame->n_vectors;
1667   from = vlib_frame_vector_args (frame);
1668   
1669   while (n_left >= 4)
1670     {
1671       u32 bi0, bi1;
1672       vlib_buffer_t * b0, * b1;
1673       ip4_forward_next_trace_t * t0, * t1;
1674
1675       /* Prefetch next iteration. */
1676       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1677       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1678
1679       bi0 = from[0];
1680       bi1 = from[1];
1681
1682       b0 = vlib_get_buffer (vm, bi0);
1683       b1 = vlib_get_buffer (vm, bi1);
1684
1685       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1686         {
1687           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1688           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1689           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1690           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1691               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1692               vec_elt (im->fib_index_by_sw_if_index,
1693                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1694
1695           clib_memcpy (t0->packet_data,
1696                   vlib_buffer_get_current (b0),
1697                   sizeof (t0->packet_data));
1698         }
1699       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1700         {
1701           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1702           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1703           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1704           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1705               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1706               vec_elt (im->fib_index_by_sw_if_index,
1707                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1708           clib_memcpy (t1->packet_data,
1709                   vlib_buffer_get_current (b1),
1710                   sizeof (t1->packet_data));
1711         }
1712       from += 2;
1713       n_left -= 2;
1714     }
1715
1716   while (n_left >= 1)
1717     {
1718       u32 bi0;
1719       vlib_buffer_t * b0;
1720       ip4_forward_next_trace_t * t0;
1721
1722       bi0 = from[0];
1723
1724       b0 = vlib_get_buffer (vm, bi0);
1725
1726       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1727         {
1728           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1729           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1730           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1731           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1732               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1733               vec_elt (im->fib_index_by_sw_if_index,
1734                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1735           clib_memcpy (t0->packet_data,
1736                   vlib_buffer_get_current (b0),
1737                   sizeof (t0->packet_data));
1738         }
1739       from += 1;
1740       n_left -= 1;
1741     }
1742 }
1743
1744 static uword
1745 ip4_drop_or_punt (vlib_main_t * vm,
1746                   vlib_node_runtime_t * node,
1747                   vlib_frame_t * frame,
1748                   ip4_error_t error_code)
1749 {
1750   u32 * buffers = vlib_frame_vector_args (frame);
1751   uword n_packets = frame->n_vectors;
1752
1753   vlib_error_drop_buffers (vm, node,
1754                            buffers,
1755                            /* stride */ 1,
1756                            n_packets,
1757                            /* next */ 0,
1758                            ip4_input_node.index,
1759                            error_code);
1760
1761   if (node->flags & VLIB_NODE_FLAG_TRACE)
1762     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1763
1764   return n_packets;
1765 }
1766
1767 static uword
1768 ip4_drop (vlib_main_t * vm,
1769           vlib_node_runtime_t * node,
1770           vlib_frame_t * frame)
1771 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1772
1773 static uword
1774 ip4_punt (vlib_main_t * vm,
1775           vlib_node_runtime_t * node,
1776           vlib_frame_t * frame)
1777 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1778
1779 static uword
1780 ip4_miss (vlib_main_t * vm,
1781           vlib_node_runtime_t * node,
1782           vlib_frame_t * frame)
1783 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1784
1785 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1786   .function = ip4_drop,
1787   .name = "ip4-drop",
1788   .vector_size = sizeof (u32),
1789
1790   .format_trace = format_ip4_forward_next_trace,
1791
1792   .n_next_nodes = 1,
1793   .next_nodes = {
1794     [0] = "error-drop",
1795   },
1796 };
1797
1798 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1799
1800 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1801   .function = ip4_punt,
1802   .name = "ip4-punt",
1803   .vector_size = sizeof (u32),
1804
1805   .format_trace = format_ip4_forward_next_trace,
1806
1807   .n_next_nodes = 1,
1808   .next_nodes = {
1809     [0] = "error-punt",
1810   },
1811 };
1812
1813 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1814
1815 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1816   .function = ip4_miss,
1817   .name = "ip4-miss",
1818   .vector_size = sizeof (u32),
1819
1820   .format_trace = format_ip4_forward_next_trace,
1821
1822   .n_next_nodes = 1,
1823   .next_nodes = {
1824     [0] = "error-drop",
1825   },
1826 };
1827
1828 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1829
1830 /* Compute TCP/UDP/ICMP4 checksum in software. */
1831 u16
1832 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1833                               ip4_header_t * ip0)
1834 {
1835   ip_csum_t sum0;
1836   u32 ip_header_length, payload_length_host_byte_order;
1837   u32 n_this_buffer, n_bytes_left;
1838   u16 sum16;
1839   void * data_this_buffer;
1840   
1841   /* Initialize checksum with ip header. */
1842   ip_header_length = ip4_header_bytes (ip0);
1843   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1844   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1845
1846   if (BITS (uword) == 32)
1847     {
1848       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1849       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1850     }
1851   else
1852     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1853
1854   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1855   data_this_buffer = (void *) ip0 + ip_header_length;
1856   if (n_this_buffer + ip_header_length > p0->current_length)
1857     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1858   while (1)
1859     {
1860       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1861       n_bytes_left -= n_this_buffer;
1862       if (n_bytes_left == 0)
1863         break;
1864
1865       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1866       p0 = vlib_get_buffer (vm, p0->next_buffer);
1867       data_this_buffer = vlib_buffer_get_current (p0);
1868       n_this_buffer = p0->current_length;
1869     }
1870
1871   sum16 = ~ ip_csum_fold (sum0);
1872
1873   return sum16;
1874 }
1875
1876 static u32
1877 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1878 {
1879   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1880   udp_header_t * udp0;
1881   u16 sum16;
1882
1883   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1884           || ip0->protocol == IP_PROTOCOL_UDP);
1885
1886   udp0 = (void *) (ip0 + 1);
1887   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1888     {
1889       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1890                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1891       return p0->flags;
1892     }
1893
1894   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1895
1896   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1897                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1898
1899   return p0->flags;
1900 }
1901
1902 static uword
1903 ip4_local (vlib_main_t * vm,
1904            vlib_node_runtime_t * node,
1905            vlib_frame_t * frame)
1906 {
1907   ip4_main_t * im = &ip4_main;
1908   ip_lookup_main_t * lm = &im->lookup_main;
1909   ip_local_next_t next_index;
1910   u32 * from, * to_next, n_left_from, n_left_to_next;
1911   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1912
1913   from = vlib_frame_vector_args (frame);
1914   n_left_from = frame->n_vectors;
1915   next_index = node->cached_next_index;
1916   
1917   if (node->flags & VLIB_NODE_FLAG_TRACE)
1918     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1919
1920   while (n_left_from > 0)
1921     {
1922       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1923
1924       while (n_left_from >= 4 && n_left_to_next >= 2)
1925         {
1926           vlib_buffer_t * p0, * p1;
1927           ip4_header_t * ip0, * ip1;
1928           udp_header_t * udp0, * udp1;
1929           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1930           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1931           ip_adjacency_t * adj0, * adj1;
1932           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1933           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1934           i32 len_diff0, len_diff1;
1935           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1936           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1937           u8 enqueue_code;
1938       
1939           pi0 = to_next[0] = from[0];
1940           pi1 = to_next[1] = from[1];
1941           from += 2;
1942           n_left_from -= 2;
1943           to_next += 2;
1944           n_left_to_next -= 2;
1945       
1946           p0 = vlib_get_buffer (vm, pi0);
1947           p1 = vlib_get_buffer (vm, pi1);
1948
1949           ip0 = vlib_buffer_get_current (p0);
1950           ip1 = vlib_buffer_get_current (p1);
1951
1952           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1953                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1954           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1955                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1956
1957           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1958           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1959
1960           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1961
1962           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1963           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1964
1965           /* Treat IP frag packets as "experimental" protocol for now
1966              until support of IP frag reassembly is implemented */
1967           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1968           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1969           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1970           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1971           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1972           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1973
1974           flags0 = p0->flags;
1975           flags1 = p1->flags;
1976
1977           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1978           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1979
1980           udp0 = ip4_next_header (ip0);
1981           udp1 = ip4_next_header (ip1);
1982
1983           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1984           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1985           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1986
1987           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1988           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1989
1990           /* Verify UDP length. */
1991           ip_len0 = clib_net_to_host_u16 (ip0->length);
1992           ip_len1 = clib_net_to_host_u16 (ip1->length);
1993           udp_len0 = clib_net_to_host_u16 (udp0->length);
1994           udp_len1 = clib_net_to_host_u16 (udp1->length);
1995
1996           len_diff0 = ip_len0 - udp_len0;
1997           len_diff1 = ip_len1 - udp_len1;
1998
1999           len_diff0 = is_udp0 ? len_diff0 : 0;
2000           len_diff1 = is_udp1 ? len_diff1 : 0;
2001
2002           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2003                                 & good_tcp_udp0 & good_tcp_udp1)))
2004             {
2005               if (is_tcp_udp0)
2006                 {
2007                   if (is_tcp_udp0
2008                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2009                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2010                   good_tcp_udp0 =
2011                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2012                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2013                 }
2014               if (is_tcp_udp1)
2015                 {
2016                   if (is_tcp_udp1
2017                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2018                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2019                   good_tcp_udp1 =
2020                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2021                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2022                 }
2023             }
2024
2025           good_tcp_udp0 &= len_diff0 >= 0;
2026           good_tcp_udp1 &= len_diff1 >= 0;
2027
2028           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2029           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2030
2031           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2032
2033           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2034           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2035
2036           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2037           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2038                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2039                     : error0);
2040           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2041                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2042                     : error1);
2043
2044           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2045           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2046
2047           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2048           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2049
2050           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2051           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2052
2053           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2054                                                            &ip0->src_address,
2055                                                            /* no_default_route */ 1));
2056           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2057                                                            &ip1->src_address,
2058                                                            /* no_default_route */ 1));
2059
2060           adj0 = ip_get_adjacency (lm, adj_index0);
2061           adj1 = ip_get_adjacency (lm, adj_index1);
2062
2063           /* 
2064            * Must have a route to source otherwise we drop the packet.
2065            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2066            */
2067           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2068                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2069                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2070                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2071                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2072                     ? IP4_ERROR_SRC_LOOKUP_MISS
2073                     : error0);
2074           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2075                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2076                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2077                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2078                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2079                     ? IP4_ERROR_SRC_LOOKUP_MISS
2080                     : error1);
2081
2082           next0 = lm->local_next_by_ip_protocol[proto0];
2083           next1 = lm->local_next_by_ip_protocol[proto1];
2084
2085           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2086           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2087
2088           p0->error = error0 ? error_node->errors[error0] : 0;
2089           p1->error = error1 ? error_node->errors[error1] : 0;
2090
2091           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2092
2093           if (PREDICT_FALSE (enqueue_code != 0))
2094             {
2095               switch (enqueue_code)
2096                 {
2097                 case 1:
2098                   /* A B A */
2099                   to_next[-2] = pi1;
2100                   to_next -= 1;
2101                   n_left_to_next += 1;
2102                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2103                   break;
2104
2105                 case 2:
2106                   /* A A B */
2107                   to_next -= 1;
2108                   n_left_to_next += 1;
2109                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2110                   break;
2111
2112                 case 3:
2113                   /* A B B or A B C */
2114                   to_next -= 2;
2115                   n_left_to_next += 2;
2116                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2117                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2118                   if (next0 == next1)
2119                     {
2120                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2121                       next_index = next1;
2122                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2123                     }
2124                   break;
2125                 }
2126             }
2127         }
2128
2129       while (n_left_from > 0 && n_left_to_next > 0)
2130         {
2131           vlib_buffer_t * p0;
2132           ip4_header_t * ip0;
2133           udp_header_t * udp0;
2134           ip4_fib_mtrie_t * mtrie0;
2135           ip4_fib_mtrie_leaf_t leaf0;
2136           ip_adjacency_t * adj0;
2137           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2138           i32 len_diff0;
2139           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2140       
2141           pi0 = to_next[0] = from[0];
2142           from += 1;
2143           n_left_from -= 1;
2144           to_next += 1;
2145           n_left_to_next -= 1;
2146       
2147           p0 = vlib_get_buffer (vm, pi0);
2148
2149           ip0 = vlib_buffer_get_current (p0);
2150
2151           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2152                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2153
2154           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2155
2156           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2157
2158           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2159
2160           /* Treat IP frag packets as "experimental" protocol for now
2161              until support of IP frag reassembly is implemented */
2162           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2163           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2164           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2165
2166           flags0 = p0->flags;
2167
2168           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2169
2170           udp0 = ip4_next_header (ip0);
2171
2172           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2173           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2174
2175           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2176
2177           /* Verify UDP length. */
2178           ip_len0 = clib_net_to_host_u16 (ip0->length);
2179           udp_len0 = clib_net_to_host_u16 (udp0->length);
2180
2181           len_diff0 = ip_len0 - udp_len0;
2182
2183           len_diff0 = is_udp0 ? len_diff0 : 0;
2184
2185           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2186             {
2187               if (is_tcp_udp0)
2188                 {
2189                   if (is_tcp_udp0
2190                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2191                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2192                   good_tcp_udp0 =
2193                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2194                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2195                 }
2196             }
2197
2198           good_tcp_udp0 &= len_diff0 >= 0;
2199
2200           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2201
2202           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2203
2204           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2205
2206           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2207           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2208                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2209                     : error0);
2210
2211           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2212
2213           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2214           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2215
2216           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2217                                                            &ip0->src_address,
2218                                                            /* no_default_route */ 1));
2219
2220           adj0 = ip_get_adjacency (lm, adj_index0);
2221
2222           /* Must have a route to source otherwise we drop the packet. */
2223           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2224                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2225                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2226                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2227                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2228                     ? IP4_ERROR_SRC_LOOKUP_MISS
2229                     : error0);
2230
2231           next0 = lm->local_next_by_ip_protocol[proto0];
2232
2233           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2234
2235           p0->error = error0? error_node->errors[error0] : 0;
2236
2237           if (PREDICT_FALSE (next0 != next_index))
2238             {
2239               n_left_to_next += 1;
2240               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2241
2242               next_index = next0;
2243               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2244               to_next[0] = pi0;
2245               to_next += 1;
2246               n_left_to_next -= 1;
2247             }
2248         }
2249   
2250       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2251     }
2252
2253   return frame->n_vectors;
2254 }
2255
2256 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2257   .function = ip4_local,
2258   .name = "ip4-local",
2259   .vector_size = sizeof (u32),
2260
2261   .format_trace = format_ip4_forward_next_trace,
2262
2263   .n_next_nodes = IP_LOCAL_N_NEXT,
2264   .next_nodes = {
2265     [IP_LOCAL_NEXT_DROP] = "error-drop",
2266     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2267     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2268     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2269   },
2270 };
2271
2272 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2273
2274 void ip4_register_protocol (u32 protocol, u32 node_index)
2275 {
2276   vlib_main_t * vm = vlib_get_main();
2277   ip4_main_t * im = &ip4_main;
2278   ip_lookup_main_t * lm = &im->lookup_main;
2279
2280   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2281   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2282 }
2283
2284 static clib_error_t *
2285 show_ip_local_command_fn (vlib_main_t * vm,
2286                           unformat_input_t * input,
2287                          vlib_cli_command_t * cmd)
2288 {
2289   ip4_main_t * im = &ip4_main;
2290   ip_lookup_main_t * lm = &im->lookup_main;
2291   int i;
2292
2293   vlib_cli_output (vm, "Protocols handled by ip4_local");
2294   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2295     {
2296       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2297         vlib_cli_output (vm, "%d", i);
2298     }
2299   return 0;
2300 }
2301
2302
2303
2304 VLIB_CLI_COMMAND (show_ip_local, static) = {
2305   .path = "show ip local",
2306   .function = show_ip_local_command_fn,
2307   .short_help = "Show ip local protocol table",
2308 };
2309
2310 static uword
2311 ip4_arp (vlib_main_t * vm,
2312          vlib_node_runtime_t * node,
2313          vlib_frame_t * frame)
2314 {
2315   vnet_main_t * vnm = vnet_get_main();
2316   ip4_main_t * im = &ip4_main;
2317   ip_lookup_main_t * lm = &im->lookup_main;
2318   u32 * from, * to_next_drop;
2319   uword n_left_from, n_left_to_next_drop, next_index;
2320   static f64 time_last_seed_change = -1e100;
2321   static u32 hash_seeds[3];
2322   static uword hash_bitmap[256 / BITS (uword)]; 
2323   f64 time_now;
2324
2325   if (node->flags & VLIB_NODE_FLAG_TRACE)
2326     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2327
2328   time_now = vlib_time_now (vm);
2329   if (time_now - time_last_seed_change > 1e-3)
2330     {
2331       uword i;
2332       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2333                                              sizeof (hash_seeds));
2334       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2335         hash_seeds[i] = r[i];
2336
2337       /* Mark all hash keys as been no-seen before. */
2338       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2339         hash_bitmap[i] = 0;
2340
2341       time_last_seed_change = time_now;
2342     }
2343
2344   from = vlib_frame_vector_args (frame);
2345   n_left_from = frame->n_vectors;
2346   next_index = node->cached_next_index;
2347   if (next_index == IP4_ARP_NEXT_DROP)
2348     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2349
2350   while (n_left_from > 0)
2351     {
2352       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2353                            to_next_drop, n_left_to_next_drop);
2354
2355       while (n_left_from > 0 && n_left_to_next_drop > 0)
2356         {
2357           vlib_buffer_t * p0;
2358           ip4_header_t * ip0;
2359           ethernet_header_t * eh0;
2360           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2361           uword bm0;
2362           ip_adjacency_t * adj0;
2363
2364           pi0 = from[0];
2365
2366           p0 = vlib_get_buffer (vm, pi0);
2367
2368           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2369           adj0 = ip_get_adjacency (lm, adj_index0);
2370           ip0 = vlib_buffer_get_current (p0);
2371
2372           /* If packet destination is not local, send ARP to next hop */
2373           if (adj0->arp.next_hop.ip4.as_u32)
2374             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2375
2376           /* 
2377            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2378            * rewrite to this packet, we need to skip it here.
2379            * Note, to distinguish from src IP addr *.8.6.*, we
2380            * check for a bcast eth dest instead of IPv4 version.
2381            */
2382           eh0 = (ethernet_header_t*)ip0;
2383           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2384             {
2385               u32 vlan_num = 0;
2386               u16 * etype = &eh0->type;
2387               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2388                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2389                 {
2390                   vlan_num += 1;
2391                   etype += 2; //vlan tag also 16 bits, same as etype
2392                 }
2393               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2394                 {
2395                   vlib_buffer_advance (
2396                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2397                   ip0 = vlib_buffer_get_current (p0);
2398                 }
2399             }
2400
2401           a0 = hash_seeds[0];
2402           b0 = hash_seeds[1];
2403           c0 = hash_seeds[2];
2404
2405           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2406           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2407
2408           a0 ^= ip0->dst_address.data_u32;
2409           b0 ^= sw_if_index0;
2410
2411           hash_v3_finalize32 (a0, b0, c0);
2412
2413           c0 &= BITS (hash_bitmap) - 1;
2414           c0 = c0 / BITS (uword);
2415           m0 = (uword) 1 << (c0 % BITS (uword));
2416
2417           bm0 = hash_bitmap[c0];
2418           drop0 = (bm0 & m0) != 0;
2419
2420           /* Mark it as seen. */
2421           hash_bitmap[c0] = bm0 | m0;
2422
2423           from += 1;
2424           n_left_from -= 1;
2425           to_next_drop[0] = pi0;
2426           to_next_drop += 1;
2427           n_left_to_next_drop -= 1;
2428
2429           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2430
2431           if (drop0)
2432             continue;
2433
2434           /* 
2435            * Can happen if the control-plane is programming tables
2436            * with traffic flowing; at least that's today's lame excuse.
2437            */
2438           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2439             {
2440               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2441             }
2442           else
2443           /* Send ARP request. */
2444           {
2445             u32 bi0 = 0;
2446             vlib_buffer_t * b0;
2447             ethernet_arp_header_t * h0;
2448             vnet_hw_interface_t * hw_if0;
2449
2450             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2451
2452             /* Add rewrite/encap string for ARP packet. */
2453             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2454
2455             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2456
2457             /* Src ethernet address in ARP header. */
2458             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2459                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2460
2461             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2462                 //No source address available
2463                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2464                 vlib_buffer_free(vm, &bi0, 1);
2465                 continue;
2466             }
2467
2468             /* Copy in destination address we are requesting. */
2469             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2470
2471             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2472             b0 = vlib_get_buffer (vm, bi0);
2473             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2474
2475             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2476
2477             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2478           }
2479         }
2480
2481       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2482     }
2483
2484   return frame->n_vectors;
2485 }
2486
2487 static char * ip4_arp_error_strings[] = {
2488   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2489   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2490   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2491   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2492   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2493   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2494 };
2495
2496 VLIB_REGISTER_NODE (ip4_arp_node) = {
2497   .function = ip4_arp,
2498   .name = "ip4-arp",
2499   .vector_size = sizeof (u32),
2500
2501   .format_trace = format_ip4_forward_next_trace,
2502
2503   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2504   .error_strings = ip4_arp_error_strings,
2505
2506   .n_next_nodes = IP4_ARP_N_NEXT,
2507   .next_nodes = {
2508     [IP4_ARP_NEXT_DROP] = "error-drop",
2509   },
2510 };
2511
2512 #define foreach_notrace_ip4_arp_error           \
2513 _(DROP)                                         \
2514 _(REQUEST_SENT)                                 \
2515 _(REPLICATE_DROP)                               \
2516 _(REPLICATE_FAIL)
2517
2518 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2519 {
2520   vlib_node_runtime_t *rt = 
2521     vlib_node_get_runtime (vm, ip4_arp_node.index);
2522
2523   /* don't trace ARP request packets */
2524 #define _(a)                                    \
2525     vnet_pcap_drop_trace_filter_add_del         \
2526         (rt->errors[IP4_ARP_ERROR_##a],         \
2527          1 /* is_add */);
2528     foreach_notrace_ip4_arp_error;
2529 #undef _
2530   return 0;
2531 }
2532
2533 VLIB_INIT_FUNCTION(arp_notrace_init);
2534
2535
2536 /* Send an ARP request to see if given destination is reachable on given interface. */
2537 clib_error_t *
2538 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2539 {
2540   vnet_main_t * vnm = vnet_get_main();
2541   ip4_main_t * im = &ip4_main;
2542   ethernet_arp_header_t * h;
2543   ip4_address_t * src;
2544   ip_interface_address_t * ia;
2545   ip_adjacency_t * adj;
2546   vnet_hw_interface_t * hi;
2547   vnet_sw_interface_t * si;
2548   vlib_buffer_t * b;
2549   u32 bi = 0;
2550
2551   si = vnet_get_sw_interface (vnm, sw_if_index);
2552
2553   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2554     {
2555       return clib_error_return (0, "%U: interface %U down",
2556                                 format_ip4_address, dst, 
2557                                 format_vnet_sw_if_index_name, vnm, 
2558                                 sw_if_index);
2559     }
2560
2561   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2562   if (! src)
2563     {
2564       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2565       return clib_error_return 
2566         (0, "no matching interface address for destination %U (interface %U)",
2567          format_ip4_address, dst,
2568          format_vnet_sw_if_index_name, vnm, sw_if_index);
2569     }
2570
2571   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2572
2573   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2574
2575   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2576
2577   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2578
2579   h->ip4_over_ethernet[0].ip4 = src[0];
2580   h->ip4_over_ethernet[1].ip4 = dst[0];
2581
2582   b = vlib_get_buffer (vm, bi);
2583   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2584
2585   /* Add encapsulation string for software interface (e.g. ethernet header). */
2586   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2587   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2588
2589   {
2590     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2591     u32 * to_next = vlib_frame_vector_args (f);
2592     to_next[0] = bi;
2593     f->n_vectors = 1;
2594     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2595   }
2596
2597   return /* no error */ 0;
2598 }
2599
2600 typedef enum {
2601   IP4_REWRITE_NEXT_DROP,
2602   IP4_REWRITE_NEXT_ARP,
2603   IP4_REWRITE_NEXT_ICMP_ERROR,
2604 } ip4_rewrite_next_t;
2605
2606 always_inline uword
2607 ip4_rewrite_inline (vlib_main_t * vm,
2608                     vlib_node_runtime_t * node,
2609                     vlib_frame_t * frame,
2610                     int rewrite_for_locally_received_packets)
2611 {
2612   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2613   u32 * from = vlib_frame_vector_args (frame);
2614   u32 n_left_from, n_left_to_next, * to_next, next_index;
2615   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2616   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2617
2618   n_left_from = frame->n_vectors;
2619   next_index = node->cached_next_index;
2620   u32 cpu_index = os_get_cpu_number();
2621   
2622   while (n_left_from > 0)
2623     {
2624       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2625
2626       while (n_left_from >= 4 && n_left_to_next >= 2)
2627         {
2628           ip_adjacency_t * adj0, * adj1;
2629           vlib_buffer_t * p0, * p1;
2630           ip4_header_t * ip0, * ip1;
2631           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2632           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2633           u32 next0_override, next1_override;
2634       
2635           if (rewrite_for_locally_received_packets)
2636               next0_override = next1_override = 0;
2637
2638           /* Prefetch next iteration. */
2639           {
2640             vlib_buffer_t * p2, * p3;
2641
2642             p2 = vlib_get_buffer (vm, from[2]);
2643             p3 = vlib_get_buffer (vm, from[3]);
2644
2645             vlib_prefetch_buffer_header (p2, STORE);
2646             vlib_prefetch_buffer_header (p3, STORE);
2647
2648             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2649             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2650           }
2651
2652           pi0 = to_next[0] = from[0];
2653           pi1 = to_next[1] = from[1];
2654
2655           from += 2;
2656           n_left_from -= 2;
2657           to_next += 2;
2658           n_left_to_next -= 2;
2659       
2660           p0 = vlib_get_buffer (vm, pi0);
2661           p1 = vlib_get_buffer (vm, pi1);
2662
2663           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2664           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2665
2666           /* We should never rewrite a pkt using the MISS adjacency */
2667           ASSERT(adj_index0 && adj_index1);
2668
2669           ip0 = vlib_buffer_get_current (p0);
2670           ip1 = vlib_buffer_get_current (p1);
2671
2672           error0 = error1 = IP4_ERROR_NONE;
2673           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2674
2675           /* Decrement TTL & update checksum.
2676              Works either endian, so no need for byte swap. */
2677           if (! rewrite_for_locally_received_packets)
2678             {
2679               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2680
2681               /* Input node should have reject packets with ttl 0. */
2682               ASSERT (ip0->ttl > 0);
2683               ASSERT (ip1->ttl > 0);
2684
2685               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2686               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2687
2688               checksum0 += checksum0 >= 0xffff;
2689               checksum1 += checksum1 >= 0xffff;
2690
2691               ip0->checksum = checksum0;
2692               ip1->checksum = checksum1;
2693
2694               ttl0 -= 1;
2695               ttl1 -= 1;
2696
2697               ip0->ttl = ttl0;
2698               ip1->ttl = ttl1;
2699
2700               /*
2701                * If the ttl drops below 1 when forwarding, generate
2702                * an ICMP response.
2703                */
2704               if (PREDICT_FALSE(ttl0 <= 0))
2705                 {
2706                   error0 = IP4_ERROR_TIME_EXPIRED;
2707                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2708                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2709                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2710                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2711                 }
2712               if (PREDICT_FALSE(ttl1 <= 0))
2713                 {
2714                   error1 = IP4_ERROR_TIME_EXPIRED;
2715                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2716                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2717                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2718                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2719                 }
2720
2721               /* Verify checksum. */
2722               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2723               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2724             }
2725
2726           /* Rewrite packet header and updates lengths. */
2727           adj0 = ip_get_adjacency (lm, adj_index0);
2728           adj1 = ip_get_adjacency (lm, adj_index1);
2729       
2730           if (rewrite_for_locally_received_packets)
2731             {
2732               /*
2733                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2734                * we end up here with a local adjacency in hand
2735                * The local adj rewrite data is 0xfefe on purpose.
2736                * Bad engineer, no donut for you.
2737                */
2738               if (PREDICT_FALSE(adj0->lookup_next_index 
2739                                 == IP_LOOKUP_NEXT_LOCAL))
2740                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2741               if (PREDICT_FALSE(adj0->lookup_next_index
2742                                 == IP_LOOKUP_NEXT_ARP))
2743                 next0_override = IP4_REWRITE_NEXT_ARP;
2744               if (PREDICT_FALSE(adj1->lookup_next_index 
2745                                 == IP_LOOKUP_NEXT_LOCAL))
2746                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2747               if (PREDICT_FALSE(adj1->lookup_next_index
2748                                 == IP_LOOKUP_NEXT_ARP))
2749                 next1_override = IP4_REWRITE_NEXT_ARP;
2750             }
2751
2752           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2753           rw_len0 = adj0[0].rewrite_header.data_bytes;
2754           rw_len1 = adj1[0].rewrite_header.data_bytes;
2755
2756           /* Check MTU of outgoing interface. */
2757           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2758                     ? IP4_ERROR_MTU_EXCEEDED
2759                     : error0);
2760           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2761                     ? IP4_ERROR_MTU_EXCEEDED
2762                     : error1);
2763
2764           next0 = (error0 == IP4_ERROR_NONE)
2765             ? adj0[0].rewrite_header.next_index : next0;
2766
2767           if (rewrite_for_locally_received_packets)
2768               next0 = next0 && next0_override ? next0_override : next0;
2769
2770           next1 = (error1 == IP4_ERROR_NONE)
2771             ? adj1[0].rewrite_header.next_index : next1;
2772
2773           if (rewrite_for_locally_received_packets)
2774               next1 = next1 && next1_override ? next1_override : next1;
2775
2776           /* 
2777            * We've already accounted for an ethernet_header_t elsewhere
2778            */
2779           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2780               vlib_increment_combined_counter 
2781                   (&lm->adjacency_counters,
2782                    cpu_index, adj_index0, 
2783                    /* packet increment */ 0,
2784                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2785
2786           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2787               vlib_increment_combined_counter 
2788                   (&lm->adjacency_counters,
2789                    cpu_index, adj_index1, 
2790                    /* packet increment */ 0,
2791                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2792
2793           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2794            * to see the IP headerr */
2795           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2796             {
2797               p0->current_data -= rw_len0;
2798               p0->current_length += rw_len0;
2799               p0->error = error_node->errors[error0];
2800               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2801                   adj0[0].rewrite_header.sw_if_index;
2802             }
2803           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2804             {
2805               p1->current_data -= rw_len1;
2806               p1->current_length += rw_len1;
2807               p1->error = error_node->errors[error1];
2808               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2809                   adj1[0].rewrite_header.sw_if_index;
2810             }
2811
2812           /* Guess we are only writing on simple Ethernet header. */
2813           vnet_rewrite_two_headers (adj0[0], adj1[0],
2814                                     ip0, ip1,
2815                                     sizeof (ethernet_header_t));
2816       
2817           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2818                                            to_next, n_left_to_next,
2819                                            pi0, pi1, next0, next1);
2820         }
2821
2822       while (n_left_from > 0 && n_left_to_next > 0)
2823         {
2824           ip_adjacency_t * adj0;
2825           vlib_buffer_t * p0;
2826           ip4_header_t * ip0;
2827           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2828           u32 next0_override;
2829       
2830           if (rewrite_for_locally_received_packets)
2831               next0_override = 0;
2832
2833           pi0 = to_next[0] = from[0];
2834
2835           p0 = vlib_get_buffer (vm, pi0);
2836
2837           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2838
2839           /* We should never rewrite a pkt using the MISS adjacency */
2840           ASSERT(adj_index0);
2841
2842           adj0 = ip_get_adjacency (lm, adj_index0);
2843       
2844           ip0 = vlib_buffer_get_current (p0);
2845
2846           error0 = IP4_ERROR_NONE;
2847           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2848
2849           /* Decrement TTL & update checksum. */
2850           if (! rewrite_for_locally_received_packets)
2851             {
2852               i32 ttl0 = ip0->ttl;
2853
2854               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2855
2856               checksum0 += checksum0 >= 0xffff;
2857
2858               ip0->checksum = checksum0;
2859
2860               ASSERT (ip0->ttl > 0);
2861
2862               ttl0 -= 1;
2863
2864               ip0->ttl = ttl0;
2865
2866               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2867
2868               if (PREDICT_FALSE(ttl0 <= 0))
2869                 {
2870                   /*
2871                    * If the ttl drops below 1 when forwarding, generate
2872                    * an ICMP response.
2873                    */
2874                   error0 = IP4_ERROR_TIME_EXPIRED;
2875                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2876                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2877                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2878                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2879                 }
2880             }
2881
2882           if (rewrite_for_locally_received_packets)
2883             {
2884               /*
2885                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2886                * we end up here with a local adjacency in hand
2887                * The local adj rewrite data is 0xfefe on purpose.
2888                * Bad engineer, no donut for you.
2889                */
2890               if (PREDICT_FALSE(adj0->lookup_next_index 
2891                                 == IP_LOOKUP_NEXT_LOCAL))
2892                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2893               /* 
2894                * We have to override the next_index in ARP adjacencies,
2895                * because they're set up for ip4-arp, not this node...
2896                */
2897               if (PREDICT_FALSE(adj0->lookup_next_index
2898                                 == IP_LOOKUP_NEXT_ARP))
2899                 next0_override = IP4_REWRITE_NEXT_ARP;
2900             }
2901
2902           /* Guess we are only writing on simple Ethernet header. */
2903           vnet_rewrite_one_header (adj0[0], ip0, 
2904                                    sizeof (ethernet_header_t));
2905           
2906           /* Update packet buffer attributes/set output interface. */
2907           rw_len0 = adj0[0].rewrite_header.data_bytes;
2908           
2909           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2910               vlib_increment_combined_counter 
2911                   (&lm->adjacency_counters,
2912                    cpu_index, adj_index0, 
2913                    /* packet increment */ 0,
2914                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2915           
2916           /* Check MTU of outgoing interface. */
2917           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2918                     > adj0[0].rewrite_header.max_l3_packet_bytes
2919                     ? IP4_ERROR_MTU_EXCEEDED
2920                     : error0);
2921
2922           p0->error = error_node->errors[error0];
2923
2924           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2925            * to see the IP headerr */
2926           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2927             {
2928               p0->current_data -= rw_len0;
2929               p0->current_length += rw_len0;
2930
2931               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2932                   adj0[0].rewrite_header.sw_if_index;
2933               next0 = adj0[0].rewrite_header.next_index;
2934             }
2935
2936           if (rewrite_for_locally_received_packets)
2937               next0 = next0 && next0_override ? next0_override : next0;
2938
2939           from += 1;
2940           n_left_from -= 1;
2941           to_next += 1;
2942           n_left_to_next -= 1;
2943       
2944           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2945                                            to_next, n_left_to_next,
2946                                            pi0, next0);
2947         }
2948   
2949       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2950     }
2951
2952   /* Need to do trace after rewrites to pick up new packet data. */
2953   if (node->flags & VLIB_NODE_FLAG_TRACE)
2954     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2955
2956   return frame->n_vectors;
2957 }
2958
2959
2960 /** @brief IPv4 transit rewrite node.
2961     @node ip4-rewrite-transit
2962
2963     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2964     header checksum, fetch the ip adjacency, check the outbound mtu,
2965     apply the adjacency rewrite, and send pkts to the adjacency
2966     rewrite header's rewrite_next_index.
2967
2968     @param vm vlib_main_t corresponding to the current thread
2969     @param node vlib_node_runtime_t
2970     @param frame vlib_frame_t whose contents should be dispatched
2971
2972     @par Graph mechanics: buffer metadata, next index usage
2973
2974     @em Uses:
2975     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2976         - the rewrite adjacency index
2977     - <code>adj->lookup_next_index</code>
2978         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2979           the packet will be dropped. 
2980     - <code>adj->rewrite_header</code>
2981         - Rewrite string length, rewrite string, next_index
2982
2983     @em Sets:
2984     - <code>b->current_data, b->current_length</code>
2985         - Updated net of applying the rewrite string
2986
2987     <em>Next Indices:</em>
2988     - <code> adj->rewrite_header.next_index </code>
2989       or @c error-drop 
2990 */
2991 static uword
2992 ip4_rewrite_transit (vlib_main_t * vm,
2993                      vlib_node_runtime_t * node,
2994                      vlib_frame_t * frame)
2995 {
2996   return ip4_rewrite_inline (vm, node, frame,
2997                              /* rewrite_for_locally_received_packets */ 0);
2998 }
2999
3000 /** @brief IPv4 local rewrite node.
3001     @node ip4-rewrite-local
3002
3003     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3004     the outbound interface mtu, apply the adjacency rewrite, and send
3005     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3006     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3007     dst = interface addr."
3008
3009     @param vm vlib_main_t corresponding to the current thread
3010     @param node vlib_node_runtime_t
3011     @param frame vlib_frame_t whose contents should be dispatched
3012
3013     @par Graph mechanics: buffer metadata, next index usage
3014
3015     @em Uses:
3016     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3017         - the rewrite adjacency index
3018     - <code>adj->lookup_next_index</code>
3019         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3020           the packet will be dropped. 
3021     - <code>adj->rewrite_header</code>
3022         - Rewrite string length, rewrite string, next_index
3023
3024     @em Sets:
3025     - <code>b->current_data, b->current_length</code>
3026         - Updated net of applying the rewrite string
3027
3028     <em>Next Indices:</em>
3029     - <code> adj->rewrite_header.next_index </code>
3030       or @c error-drop 
3031 */
3032
3033 static uword
3034 ip4_rewrite_local (vlib_main_t * vm,
3035                    vlib_node_runtime_t * node,
3036                    vlib_frame_t * frame)
3037 {
3038   return ip4_rewrite_inline (vm, node, frame,
3039                              /* rewrite_for_locally_received_packets */ 1);
3040 }
3041
3042 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3043   .function = ip4_rewrite_transit,
3044   .name = "ip4-rewrite-transit",
3045   .vector_size = sizeof (u32),
3046
3047   .format_trace = format_ip4_rewrite_trace,
3048
3049   .n_next_nodes = 3,
3050   .next_nodes = {
3051     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3052     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3053     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3054   },
3055 };
3056
3057 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3058
3059 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3060   .function = ip4_rewrite_local,
3061   .name = "ip4-rewrite-local",
3062   .vector_size = sizeof (u32),
3063
3064   .sibling_of = "ip4-rewrite-transit",
3065
3066   .format_trace = format_ip4_rewrite_trace,
3067
3068   .n_next_nodes = 0,
3069 };
3070
3071 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3072
3073 static clib_error_t *
3074 add_del_interface_table (vlib_main_t * vm,
3075                          unformat_input_t * input,
3076                          vlib_cli_command_t * cmd)
3077 {
3078   vnet_main_t * vnm = vnet_get_main();
3079   clib_error_t * error = 0;
3080   u32 sw_if_index, table_id;
3081
3082   sw_if_index = ~0;
3083
3084   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3085     {
3086       error = clib_error_return (0, "unknown interface `%U'",
3087                                  format_unformat_error, input);
3088       goto done;
3089     }
3090
3091   if (unformat (input, "%d", &table_id))
3092     ;
3093   else
3094     {
3095       error = clib_error_return (0, "expected table id `%U'",
3096                                  format_unformat_error, input);
3097       goto done;
3098     }
3099
3100   {
3101     ip4_main_t * im = &ip4_main;
3102     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3103
3104     if (fib) 
3105       {
3106         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3107         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3108     }
3109   }
3110
3111  done:
3112   return error;
3113 }
3114
3115 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3116   .path = "set interface ip table",
3117   .function = add_del_interface_table,
3118   .short_help = "Add/delete FIB table id for interface",
3119 };
3120
3121
3122 static uword
3123 ip4_lookup_multicast (vlib_main_t * vm,
3124                       vlib_node_runtime_t * node,
3125                       vlib_frame_t * frame)
3126 {
3127   ip4_main_t * im = &ip4_main;
3128   ip_lookup_main_t * lm = &im->lookup_main;
3129   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3130   u32 n_left_from, n_left_to_next, * from, * to_next;
3131   ip_lookup_next_t next;
3132   u32 cpu_index = os_get_cpu_number();
3133
3134   from = vlib_frame_vector_args (frame);
3135   n_left_from = frame->n_vectors;
3136   next = node->cached_next_index;
3137
3138   while (n_left_from > 0)
3139     {
3140       vlib_get_next_frame (vm, node, next,
3141                            to_next, n_left_to_next);
3142
3143       while (n_left_from >= 4 && n_left_to_next >= 2)
3144         {
3145           vlib_buffer_t * p0, * p1;
3146           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3147           ip_lookup_next_t next0, next1;
3148           ip4_header_t * ip0, * ip1;
3149           ip_adjacency_t * adj0, * adj1;
3150           u32 fib_index0, fib_index1;
3151           u32 flow_hash_config0, flow_hash_config1;
3152
3153           /* Prefetch next iteration. */
3154           {
3155             vlib_buffer_t * p2, * p3;
3156
3157             p2 = vlib_get_buffer (vm, from[2]);
3158             p3 = vlib_get_buffer (vm, from[3]);
3159
3160             vlib_prefetch_buffer_header (p2, LOAD);
3161             vlib_prefetch_buffer_header (p3, LOAD);
3162
3163             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3164             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3165           }
3166
3167           pi0 = to_next[0] = from[0];
3168           pi1 = to_next[1] = from[1];
3169
3170           p0 = vlib_get_buffer (vm, pi0);
3171           p1 = vlib_get_buffer (vm, pi1);
3172
3173           ip0 = vlib_buffer_get_current (p0);
3174           ip1 = vlib_buffer_get_current (p1);
3175
3176           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3177           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3178           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3179             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3180           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3181             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3182
3183           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3184                                               &ip0->dst_address, p0);
3185           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3186                                               &ip1->dst_address, p1);
3187
3188           adj0 = ip_get_adjacency (lm, adj_index0);
3189           adj1 = ip_get_adjacency (lm, adj_index1);
3190
3191           next0 = adj0->lookup_next_index;
3192           next1 = adj1->lookup_next_index;
3193
3194           flow_hash_config0 = 
3195               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3196
3197           flow_hash_config1 = 
3198               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3199
3200           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3201               (ip0, flow_hash_config0);
3202                                                                   
3203           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3204               (ip1, flow_hash_config1);
3205
3206           ASSERT (adj0->n_adj > 0);
3207           ASSERT (adj1->n_adj > 0);
3208           ASSERT (is_pow2 (adj0->n_adj));
3209           ASSERT (is_pow2 (adj1->n_adj));
3210           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3211           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3212
3213           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3214           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3215
3216           if (1) /* $$$$$$ HACK FIXME */
3217           vlib_increment_combined_counter 
3218               (cm, cpu_index, adj_index0, 1,
3219                vlib_buffer_length_in_chain (vm, p0));
3220           if (1) /* $$$$$$ HACK FIXME */
3221           vlib_increment_combined_counter 
3222               (cm, cpu_index, adj_index1, 1,
3223                vlib_buffer_length_in_chain (vm, p1));
3224
3225           from += 2;
3226           to_next += 2;
3227           n_left_to_next -= 2;
3228           n_left_from -= 2;
3229
3230           wrong_next = (next0 != next) + 2*(next1 != next);
3231           if (PREDICT_FALSE (wrong_next != 0))
3232             {
3233               switch (wrong_next)
3234                 {
3235                 case 1:
3236                   /* A B A */
3237                   to_next[-2] = pi1;
3238                   to_next -= 1;
3239                   n_left_to_next += 1;
3240                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3241                   break;
3242
3243                 case 2:
3244                   /* A A B */
3245                   to_next -= 1;
3246                   n_left_to_next += 1;
3247                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3248                   break;
3249
3250                 case 3:
3251                   /* A B C */
3252                   to_next -= 2;
3253                   n_left_to_next += 2;
3254                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3255                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3256                   if (next0 == next1)
3257                     {
3258                       /* A B B */
3259                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3260                       next = next1;
3261                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3262                     }
3263                 }
3264             }
3265         }
3266     
3267       while (n_left_from > 0 && n_left_to_next > 0)
3268         {
3269           vlib_buffer_t * p0;
3270           ip4_header_t * ip0;
3271           u32 pi0, adj_index0;
3272           ip_lookup_next_t next0;
3273           ip_adjacency_t * adj0;
3274           u32 fib_index0;
3275           u32 flow_hash_config0;
3276
3277           pi0 = from[0];
3278           to_next[0] = pi0;
3279
3280           p0 = vlib_get_buffer (vm, pi0);
3281
3282           ip0 = vlib_buffer_get_current (p0);
3283
3284           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3285                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3286           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3287               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3288           
3289           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3290                                               &ip0->dst_address, p0);
3291
3292           adj0 = ip_get_adjacency (lm, adj_index0);
3293
3294           next0 = adj0->lookup_next_index;
3295
3296           flow_hash_config0 = 
3297               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3298
3299           vnet_buffer (p0)->ip.flow_hash = 
3300             ip4_compute_flow_hash (ip0, flow_hash_config0);
3301
3302           ASSERT (adj0->n_adj > 0);
3303           ASSERT (is_pow2 (adj0->n_adj));
3304           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3305
3306           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3307
3308           if (1) /* $$$$$$ HACK FIXME */
3309               vlib_increment_combined_counter 
3310                   (cm, cpu_index, adj_index0, 1,
3311                    vlib_buffer_length_in_chain (vm, p0));
3312
3313           from += 1;
3314           to_next += 1;
3315           n_left_to_next -= 1;
3316           n_left_from -= 1;
3317
3318           if (PREDICT_FALSE (next0 != next))
3319             {
3320               n_left_to_next += 1;
3321               vlib_put_next_frame (vm, node, next, n_left_to_next);
3322               next = next0;
3323               vlib_get_next_frame (vm, node, next,
3324                                    to_next, n_left_to_next);
3325               to_next[0] = pi0;
3326               to_next += 1;
3327               n_left_to_next -= 1;
3328             }
3329         }
3330
3331       vlib_put_next_frame (vm, node, next, n_left_to_next);
3332     }
3333
3334   if (node->flags & VLIB_NODE_FLAG_TRACE)
3335       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3336
3337   return frame->n_vectors;
3338 }
3339
3340 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3341   .function = ip4_lookup_multicast,
3342   .name = "ip4-lookup-multicast",
3343   .vector_size = sizeof (u32),
3344   .sibling_of = "ip4-lookup",
3345   .format_trace = format_ip4_lookup_trace,
3346
3347   .n_next_nodes = 0,
3348 };
3349
3350 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3351
3352 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3353   .function = ip4_drop,
3354   .name = "ip4-multicast",
3355   .vector_size = sizeof (u32),
3356
3357   .format_trace = format_ip4_forward_next_trace,
3358
3359   .n_next_nodes = 1,
3360   .next_nodes = {
3361     [0] = "error-drop",
3362   },
3363 };
3364
3365 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3366 {
3367   ip4_main_t * im = &ip4_main;
3368   ip4_fib_mtrie_t * mtrie0;
3369   ip4_fib_mtrie_leaf_t leaf0;
3370   u32 adj_index0;
3371     
3372   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3373
3374   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3375   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3376   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3377   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3378   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3379   
3380   /* Handle default route. */
3381   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3382   
3383   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3384   
3385   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3386                                                   a, 
3387                                                   /* no_default_route */ 0);
3388 }
3389  
3390 static clib_error_t *
3391 test_lookup_command_fn (vlib_main_t * vm,
3392                         unformat_input_t * input,
3393                         vlib_cli_command_t * cmd)
3394 {
3395   u32 table_id = 0;
3396   f64 count = 1;
3397   u32 n;
3398   int i;
3399   ip4_address_t ip4_base_address;
3400   u64 errors = 0;
3401
3402   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3403       if (unformat (input, "table %d", &table_id))
3404         ;
3405       else if (unformat (input, "count %f", &count))
3406         ;
3407
3408       else if (unformat (input, "%U",
3409                          unformat_ip4_address, &ip4_base_address))
3410         ;
3411       else
3412         return clib_error_return (0, "unknown input `%U'",
3413                                   format_unformat_error, input);
3414   }
3415
3416   n = count;
3417
3418   for (i = 0; i < n; i++)
3419     {
3420       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3421         errors++;
3422
3423       ip4_base_address.as_u32 = 
3424         clib_host_to_net_u32 (1 + 
3425                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3426     }
3427
3428   if (errors) 
3429     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3430   else
3431     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3432
3433   return 0;
3434 }
3435
3436 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3437     .path = "test lookup",
3438     .short_help = "test lookup",
3439     .function = test_lookup_command_fn,
3440 };
3441
3442 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3443 {
3444   ip4_main_t * im4 = &ip4_main;
3445   ip4_fib_t * fib;
3446   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3447
3448   if (p == 0)
3449     return VNET_API_ERROR_NO_SUCH_FIB;
3450
3451   fib = vec_elt_at_index (im4->fibs, p[0]);
3452
3453   fib->flow_hash_config = flow_hash_config;
3454   return 0;
3455 }
3456  
3457 static clib_error_t *
3458 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3459                              unformat_input_t * input,
3460                              vlib_cli_command_t * cmd)
3461 {
3462   int matched = 0;
3463   u32 table_id = 0;
3464   u32 flow_hash_config = 0;
3465   int rv;
3466
3467   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3468     if (unformat (input, "table %d", &table_id))
3469       matched = 1;
3470 #define _(a,v) \
3471     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3472     foreach_flow_hash_bit
3473 #undef _
3474     else break;
3475   }
3476   
3477   if (matched == 0)
3478     return clib_error_return (0, "unknown input `%U'",
3479                               format_unformat_error, input);
3480   
3481   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3482   switch (rv)
3483     {
3484     case 0:
3485       break;
3486       
3487     case VNET_API_ERROR_NO_SUCH_FIB:
3488       return clib_error_return (0, "no such FIB table %d", table_id);
3489       
3490     default:
3491       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3492       break;
3493     }
3494   
3495   return 0;
3496 }
3497  
3498 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3499   .path = "set ip flow-hash",
3500   .short_help = 
3501   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3502   .function = set_ip_flow_hash_command_fn,
3503 };
3504  
3505 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3506                                  u32 table_index)
3507 {
3508   vnet_main_t * vnm = vnet_get_main();
3509   vnet_interface_main_t * im = &vnm->interface_main;
3510   ip4_main_t * ipm = &ip4_main;
3511   ip_lookup_main_t * lm = &ipm->lookup_main;
3512   vnet_classify_main_t * cm = &vnet_classify_main;
3513
3514   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3515     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3516
3517   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3518     return VNET_API_ERROR_NO_SUCH_ENTRY;
3519
3520   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3521   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3522
3523   return 0;
3524 }
3525
3526 static clib_error_t *
3527 set_ip_classify_command_fn (vlib_main_t * vm,
3528                             unformat_input_t * input,
3529                             vlib_cli_command_t * cmd)
3530 {
3531   u32 table_index = ~0;
3532   int table_index_set = 0;
3533   u32 sw_if_index = ~0;
3534   int rv;
3535   
3536   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3537     if (unformat (input, "table-index %d", &table_index))
3538       table_index_set = 1;
3539     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3540                        vnet_get_main(), &sw_if_index))
3541       ;
3542     else
3543       break;
3544   }
3545       
3546   if (table_index_set == 0)
3547     return clib_error_return (0, "classify table-index must be specified");
3548
3549   if (sw_if_index == ~0)
3550     return clib_error_return (0, "interface / subif must be specified");
3551
3552   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3553
3554   switch (rv)
3555     {
3556     case 0:
3557       break;
3558
3559     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3560       return clib_error_return (0, "No such interface");
3561
3562     case VNET_API_ERROR_NO_SUCH_ENTRY:
3563       return clib_error_return (0, "No such classifier table");
3564     }
3565   return 0;
3566 }
3567
3568 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3569     .path = "set ip classify",
3570     .short_help = 
3571     "set ip classify intfc <int> table-index <index>",
3572     .function = set_ip_classify_command_fn,
3573 };
3574