VPP-166 Documentation changes for ip4_forward.c
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>   
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>     
51
52 /** @file
53     vnet ip4 forwarding
54 */
55
56 /* This is really, really simple but stupid fib. */
57 u32
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
59                            ip4_address_t * dst,
60                            u32 disable_default_route)
61 {
62   ip_lookup_main_t * lm = &im->lookup_main;
63   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64   uword * p, * hash, key;
65   i32 i, i_min, dst_address, ai;
66
67   i_min = disable_default_route ? 1 : 0;
68   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
70     {
71       hash = fib->adj_index_by_dst_address[i];
72       if (! hash)
73         continue;
74
75       key = dst_address & im->fib_masks[i];
76       if ((p = hash_get (hash, key)) != 0)
77         {
78           ai = p[0];
79           goto done;
80         }
81     }
82
83   /* Nothing matches in table. */
84   ai = lm->miss_adj_index;
85
86  done:
87   return ai;
88 }
89
90 /** @brief Create FIB from table ID and init all hashing.
91     @param im - @ref ip4_main_t
92     @param table_id - table ID
93     @return fib - @ref ip4_fib_t
94 */
95 static ip4_fib_t *
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
97 {
98   ip4_fib_t * fib;
99   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100   vec_add2 (im->fibs, fib, 1);
101   fib->table_id = table_id;
102   fib->index = fib - im->fibs;
103   /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105   fib->fwd_classify_table_index = ~0;
106   fib->rev_classify_table_index = ~0;
107   ip4_mtrie_init (&fib->mtrie);
108   return fib;
109 }
110
111 /** @brief Find existing or Create new FIB based on index
112     @param im @ref ip4_main_t
113     @param table_index_or_id - overloaded parameter referring
114            to the table or a table's index in the FIB vector
115     @param flags - used to check if table_index_or_id was a table or
116            an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117     @return either the existing or a new ip4_fib_t entry
118 */
119 ip4_fib_t *
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121                                    u32 table_index_or_id, u32 flags)
122 {
123   uword * p, fib_index;
124
125   fib_index = table_index_or_id;
126   /* If this isn't a FIB_INDEX ... */
127   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
128     {
129       /* If passed ~0 then request the next table available */
130       if (table_index_or_id == ~0) {
131         table_index_or_id = 0;
132         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
133           table_index_or_id++;
134         }
135         /* Create the next table and return the ip4_fib_t associated with it */
136         return create_fib_with_table_id (im, table_index_or_id);
137       }
138       /* A specific table_id was requested.. */
139       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140       /* ... and if it doesn't exist create it else grab its index */
141       if (! p)
142         return create_fib_with_table_id (im, table_index_or_id);
143       fib_index = p[0];
144     }
145   /* Return the ip4_fib_t associated with this index */
146   return vec_elt_at_index (im->fibs, fib_index);
147 }
148
149 static void
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
151                                        ip4_fib_t * fib,
152                                        u32 address_length)
153 {
154   hash_t * h;
155   uword max_index;
156
157   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
159
160   fib->adj_index_by_dst_address[address_length] =
161     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
162
163   hash_set_flags (fib->adj_index_by_dst_address[address_length],
164                   HASH_FLAG_NO_AUTO_SHRINK);
165
166   h = hash_header (fib->adj_index_by_dst_address[address_length]);
167   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
168
169   /* Initialize new/old hash value vectors. */
170   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
172 }
173
174 static void
175 ip4_fib_set_adj_index (ip4_main_t * im,
176                        ip4_fib_t * fib,
177                        u32 flags,
178                        u32 dst_address_u32,
179                        u32 dst_address_length,
180                        u32 adj_index)
181 {
182   ip_lookup_main_t * lm = &im->lookup_main;
183   uword * hash;
184
185   if (vec_bytes(fib->old_hash_values))
186     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187   if (vec_bytes(fib->new_hash_values))
188     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189   fib->new_hash_values[0] = adj_index;
190
191   /* Make sure adj index is valid. */
192   if (CLIB_DEBUG > 0)
193     (void) ip_get_adjacency (lm, adj_index);
194
195   hash = fib->adj_index_by_dst_address[dst_address_length];
196
197   hash = _hash_set3 (hash, dst_address_u32,
198                      fib->new_hash_values,
199                      fib->old_hash_values);
200
201   fib->adj_index_by_dst_address[dst_address_length] = hash;
202
203   if (vec_len (im->add_del_route_callbacks) > 0)
204     {
205       ip4_add_del_route_callback_t * cb;
206       ip4_address_t d;
207       uword * p;
208
209       d.data_u32 = dst_address_u32;
210       vec_foreach (cb, im->add_del_route_callbacks)
211         if ((flags & cb->required_flags) == cb->required_flags)
212           cb->function (im, cb->function_opaque,
213                         fib, flags,
214                         &d, dst_address_length,
215                         fib->old_hash_values,
216                         fib->new_hash_values);
217
218       p = hash_get (hash, dst_address_u32);
219       /* hash_get should never return NULL here */
220       if (p)
221           clib_memcpy (p, fib->new_hash_values, 
222                        vec_bytes (fib->new_hash_values));
223       else
224           ASSERT(0);
225     }
226 }
227
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
229 {
230   ip_lookup_main_t * lm = &im->lookup_main;
231   ip4_fib_t * fib;
232   u32 dst_address, dst_address_length, adj_index, old_adj_index;
233   uword * hash, is_del;
234   ip4_add_del_route_callback_t * cb;
235
236   /* Either create new adjacency or use given one depending on arguments. */
237   if (a->n_add_adj > 0)
238     {
239       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
241     }
242   else
243     adj_index = a->adj_index;
244
245   dst_address = a->dst_address.data_u32;
246   dst_address_length = a->dst_address_length;
247   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
248
249   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250   dst_address &= im->fib_masks[dst_address_length];
251
252   if (! fib->adj_index_by_dst_address[dst_address_length])
253     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
254
255   hash = fib->adj_index_by_dst_address[dst_address_length];
256
257   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
258
259   if (is_del)
260     {
261       fib->old_hash_values[0] = ~0;
262       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263       fib->adj_index_by_dst_address[dst_address_length] = hash;
264
265       if (vec_len (im->add_del_route_callbacks) > 0
266           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
267         {
268           fib->new_hash_values[0] = ~0;
269           vec_foreach (cb, im->add_del_route_callbacks)
270             if ((a->flags & cb->required_flags) == cb->required_flags)
271               cb->function (im, cb->function_opaque,
272                             fib, a->flags,
273                             &a->dst_address, dst_address_length,
274                             fib->old_hash_values,
275                             fib->new_hash_values);
276         }
277     }
278   else
279     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
280                            adj_index);
281
282   old_adj_index = fib->old_hash_values[0];
283
284   /* Avoid spurious reference count increments */
285   if (old_adj_index == adj_index
286       && adj_index != ~0
287       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
288     {
289       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290       if (adj->share_count > 0)
291         adj->share_count --;
292     }
293
294   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295                                is_del ? old_adj_index : adj_index,
296                                is_del);
297
298   /* Delete old adjacency index if present and changed. */
299   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300       && old_adj_index != ~0
301       && old_adj_index != adj_index)
302     ip_del_adjacency (lm, old_adj_index);
303 }
304
305
306 u32
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
308                             u32 fib_index,
309                             ip4_address_t *next_hop,
310                             u32 next_hop_sw_if_index,
311                             u32 explicit_fib_index)
312 {
313   ip_lookup_main_t * lm = &im->lookup_main;
314   vnet_main_t * vnm = vnet_get_main();
315   uword * nh_hash, * nh_result;
316   int is_interface_next_hop;
317   u32 nh_adj_index;
318   ip4_fib_t * fib;
319
320   fib = vec_elt_at_index (im->fibs, fib_index);
321
322   is_interface_next_hop = next_hop->data_u32 == 0;
323   if (is_interface_next_hop)
324     {
325       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
326       if (nh_result)
327           nh_adj_index = *nh_result;
328       else
329         {
330            ip_adjacency_t * adj;
331            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
332                                    &nh_adj_index);
333            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
336         }
337     }
338   else if (next_hop_sw_if_index == ~0)
339     {
340       /* next-hop is recursive. we always need a indirect adj
341        * for recursive paths. Any LPM we perform now will give
342        * us a valid adj, but without tracking the next-hop we
343        * have no way to keep it valid.
344        */
345       ip_adjacency_t add_adj;
346       memset (&add_adj, 0, sizeof(add_adj));
347       add_adj.n_adj = 1;
348       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350       add_adj.explicit_fib_index = explicit_fib_index;
351       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
352     }
353   else
354     {
355       nh_hash = fib->adj_index_by_dst_address[32];
356       nh_result = hash_get (nh_hash, next_hop->data_u32);
357
358       /* Next hop must be known. */
359       if (! nh_result)
360         {
361           ip_adjacency_t * adj;
362
363           /* no /32 exists, get the longest prefix match */
364           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
365                                                     next_hop, 0);
366           adj = ip_get_adjacency (lm, nh_adj_index);
367           /* if ARP interface adjacency is present, we need to
368              install ARP adjaceny for specific next hop */
369           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370               adj->arp.next_hop.ip4.as_u32 == 0)
371             {
372               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
373             }
374         }
375       else
376         {
377           nh_adj_index = *nh_result;
378         }
379     }
380
381   return (nh_adj_index);
382 }
383
384 void
385 ip4_add_del_route_next_hop (ip4_main_t * im,
386                             u32 flags,
387                             ip4_address_t * dst_address,
388                             u32 dst_address_length,
389                             ip4_address_t * next_hop,
390                             u32 next_hop_sw_if_index,
391                             u32 next_hop_weight, u32 adj_index, 
392                             u32 explicit_fib_index)
393 {
394   vnet_main_t * vnm = vnet_get_main();
395   ip_lookup_main_t * lm = &im->lookup_main;
396   u32 fib_index;
397   ip4_fib_t * fib;
398   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399   u32 dst_adj_index, nh_adj_index;
400   uword * dst_hash, * dst_result;
401   ip_adjacency_t * dst_adj;
402   ip_multipath_adjacency_t * old_mp, * new_mp;
403   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404   clib_error_t * error = 0;
405
406   if (explicit_fib_index == (u32)~0)
407       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
408   else
409       fib_index = explicit_fib_index;
410
411   fib = vec_elt_at_index (im->fibs, fib_index);
412
413   /* Lookup next hop to be added or deleted. */
414   if (adj_index == (u32)~0)
415     {
416         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
417                                                   next_hop,
418                                                   next_hop_sw_if_index,
419                                                   explicit_fib_index);
420     }
421   else
422     {
423       nh_adj_index = adj_index;
424     }
425   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
427
428   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429   dst_result = hash_get (dst_hash, dst_address_u32);
430   if (dst_result)
431     {
432       dst_adj_index = dst_result[0];
433       dst_adj = ip_get_adjacency (lm, dst_adj_index);
434     }
435   else
436     {
437       /* For deletes destination must be known. */
438       if (is_del)
439         {
440           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441           error = clib_error_return (0, "unknown destination %U/%d",
442                                      format_ip4_address, dst_address,
443                                      dst_address_length);
444           goto done;
445         }
446
447       dst_adj_index = ~0;
448       dst_adj = 0;
449     }
450
451   /* Ignore adds of X/32 with next hop of X. */
452   if (! is_del
453       && dst_address_length == 32
454       && dst_address->data_u32 == next_hop->data_u32 
455       && adj_index != (u32)~0)
456     {
457       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458       error = clib_error_return (0, "prefix matches next hop %U/%d",
459                                  format_ip4_address, dst_address,
460                                  dst_address_length);
461       goto done;
462     }
463
464   /* Destination is not known and default weight is set so add route
465      to existing non-multipath adjacency */
466   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
467     {
468       /* create / delete additional mapping of existing adjacency */
469       ip4_add_del_route_args_t a;
470
471       a.table_index_or_table_id = fib_index;
472       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473                  | IP4_ROUTE_FLAG_FIB_INDEX
474                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477       a.dst_address = dst_address[0];
478       a.dst_address_length = dst_address_length;
479       a.adj_index = nh_adj_index;
480       a.add_adj = 0;
481       a.n_add_adj = 0;
482
483       ip4_add_del_route (im, &a);
484       goto done;
485     }
486
487   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
488
489   if (! ip_multipath_adjacency_add_del_next_hop
490       (lm, is_del,
491        old_mp_adj_index,
492        nh_adj_index,
493        next_hop_weight,
494        &new_mp_adj_index))
495     {
496       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498                                  format_ip4_address, next_hop);
499       goto done;
500     }
501   
502   old_mp = new_mp = 0;
503   if (old_mp_adj_index != ~0)
504     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505   if (new_mp_adj_index != ~0)
506     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
507
508   if (old_mp != new_mp)
509     {
510       ip4_add_del_route_args_t a;
511       ip_adjacency_t * adj;
512
513       a.table_index_or_table_id = fib_index;
514       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515                  | IP4_ROUTE_FLAG_FIB_INDEX
516                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518       a.dst_address = dst_address[0];
519       a.dst_address_length = dst_address_length;
520       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
521       a.add_adj = 0;
522       a.n_add_adj = 0;
523
524       ip4_add_del_route (im, &a);
525
526       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
527       if (adj->n_adj == 1)
528         adj->share_count += is_del ? -1 : 1;
529     }
530
531  done:
532   if (error)
533     clib_error_report (error);
534 }
535
536 void *
537 ip4_get_route (ip4_main_t * im,
538                u32 table_index_or_table_id,
539                u32 flags,
540                u8 * address,
541                u32 address_length)
542 {
543   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544   u32 dst_address = * (u32 *) address;
545   uword * hash, * p;
546
547   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548   dst_address &= im->fib_masks[address_length];
549
550   hash = fib->adj_index_by_dst_address[address_length];
551   p = hash_get (hash, dst_address);
552   return (void *) p;
553 }
554
555 void
556 ip4_foreach_matching_route (ip4_main_t * im,
557                             u32 table_index_or_table_id,
558                             u32 flags,
559                             ip4_address_t * address,
560                             u32 address_length,
561                             ip4_address_t ** results,
562                             u8 ** result_lengths)
563 {
564   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565   u32 dst_address = address->data_u32;
566   u32 this_length = address_length;
567   
568   if (*results)
569     _vec_len (*results) = 0;
570   if (*result_lengths)
571     _vec_len (*result_lengths) = 0;
572
573   while (this_length <= 32 && vec_len (results) == 0)
574     {
575       uword k, v;
576       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
578           {
579             ip4_address_t a;
580             a.data_u32 = k;
581             vec_add1 (*results, a);
582             vec_add1 (*result_lengths, this_length);
583           }
584       }));
585
586       this_length++;
587     }
588 }
589
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591                                   u32 table_index_or_table_id,
592                                   u32 flags)
593 {
594   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595   ip_lookup_main_t * lm = &im->lookup_main;
596   u32 i, l;
597   ip4_address_t a;
598   ip4_add_del_route_callback_t * cb;
599   static ip4_address_t * to_delete;
600
601   if (lm->n_adjacency_remaps == 0)
602     return;
603
604   for (l = 0; l <= 32; l++)
605     {
606       hash_pair_t * p;
607       uword * hash = fib->adj_index_by_dst_address[l];
608
609       if (hash_elts (hash) == 0)
610         continue;
611
612       if (to_delete)
613         _vec_len (to_delete) = 0;
614
615       hash_foreach_pair (p, hash, ({
616         u32 adj_index = p->value[0];
617         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
618
619         if (m)
620           {
621             /* Record destination address from hash key. */
622             a.data_u32 = p->key;
623
624             /* New adjacency points to nothing: so delete prefix. */
625             if (m == ~0)
626               vec_add1 (to_delete, a);
627             else
628               {
629                 /* Remap to new adjacency. */
630                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
631
632                 /* Set new adjacency value. */
633                 fib->new_hash_values[0] = p->value[0] = m - 1;
634
635                 vec_foreach (cb, im->add_del_route_callbacks)
636                   if ((flags & cb->required_flags) == cb->required_flags)
637                     cb->function (im, cb->function_opaque,
638                                   fib, flags | IP4_ROUTE_FLAG_ADD,
639                                   &a, l,
640                                   fib->old_hash_values,
641                                   fib->new_hash_values);
642               }
643           }
644       }));
645
646       fib->new_hash_values[0] = ~0;
647       for (i = 0; i < vec_len (to_delete); i++)
648         {
649           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650           vec_foreach (cb, im->add_del_route_callbacks)
651             if ((flags & cb->required_flags) == cb->required_flags)
652               cb->function (im, cb->function_opaque,
653                             fib, flags | IP4_ROUTE_FLAG_DEL,
654                             &a, l,
655                             fib->old_hash_values,
656                             fib->new_hash_values);
657         }
658     }
659
660   /* Also remap adjacencies in mtrie. */
661   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
662
663   /* Reset mapping table. */
664   vec_zero (lm->adjacency_remap_table);
665
666   /* All remaps have been performed. */
667   lm->n_adjacency_remaps = 0;
668 }
669
670 void ip4_delete_matching_routes (ip4_main_t * im,
671                                  u32 table_index_or_table_id,
672                                  u32 flags,
673                                  ip4_address_t * address,
674                                  u32 address_length)
675 {
676   static ip4_address_t * matching_addresses;
677   static u8 * matching_address_lengths;
678   u32 l, i;
679   ip4_add_del_route_args_t a;
680
681   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682   a.table_index_or_table_id = table_index_or_table_id;
683   a.adj_index = ~0;
684   a.add_adj = 0;
685   a.n_add_adj = 0;
686
687   for (l = address_length + 1; l <= 32; l++)
688     {
689       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
690                                   address,
691                                   l,
692                                   &matching_addresses,
693                                   &matching_address_lengths);
694       for (i = 0; i < vec_len (matching_addresses); i++)
695         {
696           a.dst_address = matching_addresses[i];
697           a.dst_address_length = matching_address_lengths[i];
698           ip4_add_del_route (im, &a);
699         }
700     }
701
702   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
703 }
704
705 void
706 ip4_forward_next_trace (vlib_main_t * vm,
707                         vlib_node_runtime_t * node,
708                         vlib_frame_t * frame,
709                         vlib_rx_or_tx_t which_adj_index);
710
711 always_inline uword
712 ip4_lookup_inline (vlib_main_t * vm,
713                    vlib_node_runtime_t * node,
714                    vlib_frame_t * frame,
715                    int lookup_for_responses_to_locally_received_packets,
716                    int is_indirect)
717 {
718   ip4_main_t * im = &ip4_main;
719   ip_lookup_main_t * lm = &im->lookup_main;
720   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721   u32 n_left_from, n_left_to_next, * from, * to_next;
722   ip_lookup_next_t next;
723   u32 cpu_index = os_get_cpu_number();
724
725   from = vlib_frame_vector_args (frame);
726   n_left_from = frame->n_vectors;
727   next = node->cached_next_index;
728
729   while (n_left_from > 0)
730     {
731       vlib_get_next_frame (vm, node, next,
732                            to_next, n_left_to_next);
733
734       while (n_left_from >= 4 && n_left_to_next >= 2)
735         {
736           vlib_buffer_t * p0, * p1;
737           ip4_header_t * ip0, * ip1;
738           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739           ip_lookup_next_t next0, next1;
740           ip_adjacency_t * adj0, * adj1;
741           ip4_fib_mtrie_t * mtrie0, * mtrie1;
742           ip4_fib_mtrie_leaf_t leaf0, leaf1;
743           ip4_address_t * dst_addr0, *dst_addr1;
744           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746           u32 flow_hash_config0, flow_hash_config1;
747           u32 hash_c0, hash_c1;
748           u32 wrong_next;
749
750           /* Prefetch next iteration. */
751           {
752             vlib_buffer_t * p2, * p3;
753
754             p2 = vlib_get_buffer (vm, from[2]);
755             p3 = vlib_get_buffer (vm, from[3]);
756
757             vlib_prefetch_buffer_header (p2, LOAD);
758             vlib_prefetch_buffer_header (p3, LOAD);
759
760             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
762           }
763
764           pi0 = to_next[0] = from[0];
765           pi1 = to_next[1] = from[1];
766
767           p0 = vlib_get_buffer (vm, pi0);
768           p1 = vlib_get_buffer (vm, pi1);
769
770           ip0 = vlib_buffer_get_current (p0);
771           ip1 = vlib_buffer_get_current (p1);
772
773           if (is_indirect)
774             {
775               ip_adjacency_t * iadj0, * iadj1;
776               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778               dst_addr0 = &iadj0->indirect.next_hop.ip4;
779               dst_addr1 = &iadj1->indirect.next_hop.ip4;
780             }
781           else
782             {
783               dst_addr0 = &ip0->dst_address;
784               dst_addr1 = &ip1->dst_address;
785             }
786
787           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
793
794
795           if (! lookup_for_responses_to_locally_received_packets)
796             {
797               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
799
800               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
801
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
804             }
805
806           tcp0 = (void *) (ip0 + 1);
807           tcp1 = (void *) (ip1 + 1);
808
809           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810                          || ip0->protocol == IP_PROTOCOL_UDP);
811           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812                          || ip1->protocol == IP_PROTOCOL_UDP);
813
814           if (! lookup_for_responses_to_locally_received_packets)
815             {
816               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
818             }
819
820           if (! lookup_for_responses_to_locally_received_packets)
821             {
822               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
824             }
825
826           if (! lookup_for_responses_to_locally_received_packets)
827             {
828               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
830             }
831
832           if (lookup_for_responses_to_locally_received_packets)
833             {
834               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
836             }
837           else
838             {
839               /* Handle default route. */
840               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
842
843               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
845             }
846
847           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
848                                                            dst_addr0,
849                                                            /* no_default_route */ 0));
850           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
851                                                            dst_addr1,
852                                                            /* no_default_route */ 0));
853           adj0 = ip_get_adjacency (lm, adj_index0);
854           adj1 = ip_get_adjacency (lm, adj_index1);
855
856           next0 = adj0->lookup_next_index;
857           next1 = adj1->lookup_next_index;
858
859           /* Use flow hash to compute multipath adjacency. */
860           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862           if (PREDICT_FALSE (adj0->n_adj > 1))
863             {
864               flow_hash_config0 = 
865                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
867                 ip4_compute_flow_hash (ip0, flow_hash_config0);
868             }
869           if (PREDICT_FALSE(adj1->n_adj > 1))
870             {
871               flow_hash_config1 = 
872                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
874                 ip4_compute_flow_hash (ip1, flow_hash_config1);
875             }
876
877           ASSERT (adj0->n_adj > 0);
878           ASSERT (adj1->n_adj > 0);
879           ASSERT (is_pow2 (adj0->n_adj));
880           ASSERT (is_pow2 (adj1->n_adj));
881           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
883
884           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
886
887           vlib_increment_combined_counter 
888               (cm, cpu_index, adj_index0, 1,
889                vlib_buffer_length_in_chain (vm, p0) 
890                + sizeof(ethernet_header_t));
891           vlib_increment_combined_counter 
892               (cm, cpu_index, adj_index1, 1,
893                vlib_buffer_length_in_chain (vm, p1)
894                + sizeof(ethernet_header_t));
895
896           from += 2;
897           to_next += 2;
898           n_left_to_next -= 2;
899           n_left_from -= 2;
900
901           wrong_next = (next0 != next) + 2*(next1 != next);
902           if (PREDICT_FALSE (wrong_next != 0))
903             {
904               switch (wrong_next)
905                 {
906                 case 1:
907                   /* A B A */
908                   to_next[-2] = pi1;
909                   to_next -= 1;
910                   n_left_to_next += 1;
911                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
912                   break;
913
914                 case 2:
915                   /* A A B */
916                   to_next -= 1;
917                   n_left_to_next += 1;
918                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
919                   break;
920
921                 case 3:
922                   /* A B C */
923                   to_next -= 2;
924                   n_left_to_next += 2;
925                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
926                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
927                   if (next0 == next1)
928                     {
929                       /* A B B */
930                       vlib_put_next_frame (vm, node, next, n_left_to_next);
931                       next = next1;
932                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
933                     }
934                 }
935             }
936         }
937     
938       while (n_left_from > 0 && n_left_to_next > 0)
939         {
940           vlib_buffer_t * p0;
941           ip4_header_t * ip0;
942           __attribute__((unused)) tcp_header_t * tcp0;
943           ip_lookup_next_t next0;
944           ip_adjacency_t * adj0;
945           ip4_fib_mtrie_t * mtrie0;
946           ip4_fib_mtrie_leaf_t leaf0;
947           ip4_address_t * dst_addr0;
948           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
949           u32 flow_hash_config0, hash_c0;
950
951           pi0 = from[0];
952           to_next[0] = pi0;
953
954           p0 = vlib_get_buffer (vm, pi0);
955
956           ip0 = vlib_buffer_get_current (p0);
957
958           if (is_indirect)
959             {
960               ip_adjacency_t * iadj0;
961               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
962               dst_addr0 = &iadj0->indirect.next_hop.ip4;
963             }
964           else
965             {
966               dst_addr0 = &ip0->dst_address;
967             }
968
969           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
970           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
971             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
972
973           if (! lookup_for_responses_to_locally_received_packets)
974             {
975               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
976
977               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
978
979               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
980             }
981
982           tcp0 = (void *) (ip0 + 1);
983
984           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
985                          || ip0->protocol == IP_PROTOCOL_UDP);
986
987           if (! lookup_for_responses_to_locally_received_packets)
988             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
989
990           if (! lookup_for_responses_to_locally_received_packets)
991             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
992
993           if (! lookup_for_responses_to_locally_received_packets)
994             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
995
996           if (lookup_for_responses_to_locally_received_packets)
997             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
998           else
999             {
1000               /* Handle default route. */
1001               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1002               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1003             }
1004
1005           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1006                                                            dst_addr0,
1007                                                            /* no_default_route */ 0));
1008
1009           adj0 = ip_get_adjacency (lm, adj_index0);
1010
1011           next0 = adj0->lookup_next_index;
1012
1013           /* Use flow hash to compute multipath adjacency. */
1014           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1015           if (PREDICT_FALSE(adj0->n_adj > 1))
1016             {
1017               flow_hash_config0 = 
1018                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1019
1020               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1021                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1022             }
1023
1024           ASSERT (adj0->n_adj > 0);
1025           ASSERT (is_pow2 (adj0->n_adj));
1026           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1027
1028           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1029
1030           vlib_increment_combined_counter 
1031               (cm, cpu_index, adj_index0, 1,
1032                vlib_buffer_length_in_chain (vm, p0)
1033                + sizeof(ethernet_header_t));
1034
1035           from += 1;
1036           to_next += 1;
1037           n_left_to_next -= 1;
1038           n_left_from -= 1;
1039
1040           if (PREDICT_FALSE (next0 != next))
1041             {
1042               n_left_to_next += 1;
1043               vlib_put_next_frame (vm, node, next, n_left_to_next);
1044               next = next0;
1045               vlib_get_next_frame (vm, node, next,
1046                                    to_next, n_left_to_next);
1047               to_next[0] = pi0;
1048               to_next += 1;
1049               n_left_to_next -= 1;
1050             }
1051         }
1052
1053       vlib_put_next_frame (vm, node, next, n_left_to_next);
1054     }
1055
1056   if (node->flags & VLIB_NODE_FLAG_TRACE)
1057     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1058
1059   return frame->n_vectors;
1060 }
1061
1062 /** @brief IPv4 lookup node.
1063     @node ip4-lookup
1064
1065     This is the main IPv4 lookup dispatch node.
1066
1067     @param vm vlib_main_t corresponding to the current thread
1068     @param node vlib_node_runtime_t
1069     @param frame vlib_frame_t whose contents should be dispatched
1070
1071     @par Graph mechanics: buffer metadata, next index usage
1072
1073     @em Uses:
1074     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1075         - Indicates the @c sw_if_index value of the interface that the
1076           packet was received on.
1077     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1078         - When the value is @c ~0 then the node performs a longest prefix
1079           match (LPM) for the packet destination address in the FIB attached
1080           to the receive interface.
1081         - Otherwise perform LPM for the packet destination address in the
1082           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1083           value (0, 1, ...) and not a VRF id.
1084
1085     @em Sets:
1086     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1087         - The lookup result adjacency index.
1088
1089     <em>Next Index:</em>
1090     - Dispatches the packet to the node index found in
1091       ip_adjacency_t @c adj->lookup_next_index
1092       (where @c adj is the lookup result adjacency).
1093 */
1094 static uword
1095 ip4_lookup (vlib_main_t * vm,
1096             vlib_node_runtime_t * node,
1097             vlib_frame_t * frame)
1098 {
1099   return ip4_lookup_inline (vm, node, frame,
1100                             /* lookup_for_responses_to_locally_received_packets */ 0,
1101                             /* is_indirect */ 0);
1102
1103 }
1104
1105 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1106                                         ip_adjacency_t * adj,
1107                                         u32 sw_if_index,
1108                                         u32 if_address_index)
1109 {
1110   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1111   ip_lookup_next_t n;
1112   vnet_l3_packet_type_t packet_type;
1113   u32 node_index;
1114
1115   if (hw->hw_class_index == ethernet_hw_interface_class.index
1116       || hw->hw_class_index == srp_hw_interface_class.index)
1117     {
1118       /* 
1119        * We have a bit of a problem in this case. ip4-arp uses
1120        * the rewrite_header.next_index to hand pkts to the
1121        * indicated inteface output node. We can end up in
1122        * ip4_rewrite_local, too, which also pays attention to 
1123        * rewrite_header.next index. Net result: a hack in
1124        * ip4_rewrite_local...
1125        */
1126       n = IP_LOOKUP_NEXT_ARP;
1127       node_index = ip4_arp_node.index;
1128       adj->if_address_index = if_address_index;
1129       adj->arp.next_hop.ip4.as_u32 = 0;
1130       ip46_address_reset(&adj->arp.next_hop);
1131       packet_type = VNET_L3_PACKET_TYPE_ARP;
1132     }
1133   else
1134     {
1135       n = IP_LOOKUP_NEXT_REWRITE;
1136       node_index = ip4_rewrite_node.index;
1137       packet_type = VNET_L3_PACKET_TYPE_IP4;
1138     }
1139
1140   adj->lookup_next_index = n;
1141   vnet_rewrite_for_sw_interface
1142     (vnm,
1143      packet_type,
1144      sw_if_index,
1145      node_index,
1146      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1147      &adj->rewrite_header,
1148      sizeof (adj->rewrite_data));
1149 }
1150
1151 static void
1152 ip4_add_interface_routes (u32 sw_if_index,
1153                           ip4_main_t * im, u32 fib_index,
1154                           ip_interface_address_t * a)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip_lookup_main_t * lm = &im->lookup_main;
1158   ip_adjacency_t * adj;
1159   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1160   ip4_add_del_route_args_t x;
1161   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1162   u32 classify_table_index;
1163
1164   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1165   x.table_index_or_table_id = fib_index;
1166   x.flags = (IP4_ROUTE_FLAG_ADD
1167              | IP4_ROUTE_FLAG_FIB_INDEX
1168              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1169   x.dst_address = address[0];
1170   x.dst_address_length = a->address_length;
1171   x.n_add_adj = 0;
1172   x.add_adj = 0;
1173
1174   a->neighbor_probe_adj_index = ~0;
1175   if (a->address_length < 32)
1176     {
1177       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1178                               &x.adj_index);
1179       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1180       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1181       ip4_add_del_route (im, &x);
1182       a->neighbor_probe_adj_index = x.adj_index;
1183     }
1184   
1185   /* Add e.g. 1.1.1.1/32 as local to this host. */
1186   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1187                           &x.adj_index);
1188   
1189   classify_table_index = ~0;
1190   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1191     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1192   if (classify_table_index != (u32) ~0)
1193     {
1194       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1195       adj->classify.table_index = classify_table_index;
1196     }
1197   else
1198     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1199   
1200   adj->if_address_index = a - lm->if_address_pool;
1201   adj->rewrite_header.sw_if_index = sw_if_index;
1202   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1203   /* 
1204    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1205    * fail an RPF-ish check, but still go thru the rewrite code...
1206    */
1207   adj->rewrite_header.data_bytes = 0;
1208
1209   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1210   x.dst_address_length = 32;
1211   ip4_add_del_route (im, &x);
1212 }
1213
1214 static void
1215 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1216 {
1217   ip4_add_del_route_args_t x;
1218
1219   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1220   x.table_index_or_table_id = fib_index;
1221   x.flags = (IP4_ROUTE_FLAG_DEL
1222              | IP4_ROUTE_FLAG_FIB_INDEX
1223              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1224   x.dst_address = address[0];
1225   x.dst_address_length = address_length;
1226   x.adj_index = ~0;
1227   x.n_add_adj = 0;
1228   x.add_adj = 0;
1229
1230   if (address_length < 32)
1231     ip4_add_del_route (im, &x);
1232
1233   x.dst_address_length = 32;
1234   ip4_add_del_route (im, &x);
1235
1236   ip4_delete_matching_routes (im,
1237                               fib_index,
1238                               IP4_ROUTE_FLAG_FIB_INDEX,
1239                               address,
1240                               address_length);
1241 }
1242
1243 typedef struct {
1244     u32 sw_if_index;
1245     ip4_address_t address;
1246     u32 length;
1247 } ip4_interface_address_t;
1248
1249 static clib_error_t *
1250 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1251                                         u32 sw_if_index,
1252                                         ip4_address_t * new_address,
1253                                         u32 new_length,
1254                                         u32 redistribute,
1255                                         u32 insert_routes,
1256                                         u32 is_del);
1257
1258 static clib_error_t *
1259 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1260                                         u32 sw_if_index,
1261                                         ip4_address_t * address,
1262                                         u32 address_length,
1263                                         u32 redistribute,
1264                                         u32 insert_routes,
1265                                         u32 is_del)
1266 {
1267   vnet_main_t * vnm = vnet_get_main();
1268   ip4_main_t * im = &ip4_main;
1269   ip_lookup_main_t * lm = &im->lookup_main;
1270   clib_error_t * error = 0;
1271   u32 if_address_index, elts_before;
1272   ip4_address_fib_t ip4_af, * addr_fib = 0;
1273
1274   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1275   ip4_addr_fib_init (&ip4_af, address,
1276                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1277   vec_add1 (addr_fib, ip4_af);
1278
1279   /* When adding an address check that it does not conflict with an existing address. */
1280   if (! is_del)
1281     {
1282       ip_interface_address_t * ia;
1283       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1284                                     0 /* honor unnumbered */,
1285       ({
1286         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1287
1288         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1289             || ip4_destination_matches_route (im, x, address, address_length))
1290           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1291                                     format_ip4_address_and_length, address, address_length,
1292                                     format_ip4_address_and_length, x, ia->address_length,
1293                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1294       }));
1295     }
1296
1297   elts_before = pool_elts (lm->if_address_pool);
1298
1299   error = ip_interface_address_add_del
1300     (lm,
1301      sw_if_index,
1302      addr_fib,
1303      address_length,
1304      is_del,
1305      &if_address_index);
1306   if (error)
1307     goto done;
1308   
1309   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1310     {
1311       if (is_del)
1312         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1313                                   address_length);
1314       
1315       else
1316           ip4_add_interface_routes (sw_if_index,
1317                                     im, ip4_af.fib_index,
1318                                     pool_elt_at_index 
1319                                     (lm->if_address_pool, if_address_index));
1320     }
1321
1322   /* If pool did not grow/shrink: add duplicate address. */
1323   if (elts_before != pool_elts (lm->if_address_pool))
1324     {
1325       ip4_add_del_interface_address_callback_t * cb;
1326       vec_foreach (cb, im->add_del_interface_address_callbacks)
1327         cb->function (im, cb->function_opaque, sw_if_index,
1328                       address, address_length,
1329                       if_address_index,
1330                       is_del);
1331     }
1332
1333  done:
1334   vec_free (addr_fib);
1335   return error;
1336 }
1337
1338 clib_error_t *
1339 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1340                                ip4_address_t * address, u32 address_length,
1341                                u32 is_del)
1342 {
1343   return ip4_add_del_interface_address_internal
1344     (vm, sw_if_index, address, address_length,
1345      /* redistribute */ 1,
1346      /* insert_routes */ 1,
1347      is_del);
1348 }
1349
1350 static clib_error_t *
1351 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1352                                 u32 sw_if_index,
1353                                 u32 flags)
1354 {
1355   ip4_main_t * im = &ip4_main;
1356   ip_interface_address_t * ia;
1357   ip4_address_t * a;
1358   u32 is_admin_up, fib_index;
1359   
1360   /* Fill in lookup tables with default table (0). */
1361   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1362   
1363   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1364   
1365   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1366   
1367   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1368
1369   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1370                                 0 /* honor unnumbered */,
1371   ({
1372     a = ip_interface_address_get_address (&im->lookup_main, ia);
1373     if (is_admin_up)
1374       ip4_add_interface_routes (sw_if_index,
1375                                 im, fib_index,
1376                                 ia);
1377     else
1378       ip4_del_interface_routes (im, fib_index,
1379                                 a, ia->address_length);
1380   }));
1381
1382   return 0;
1383 }
1384  
1385 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1386
1387 /* Built-in ip4 unicast rx feature path definition */
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1389   .node_name = "ip4-inacl", 
1390   .runs_before = {"ip4-source-check-via-rx", 0}, 
1391   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1392 };
1393
1394 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1395   .node_name = "ip4-source-check-via-rx",
1396   .runs_before = {"ip4-source-check-via-any", 0},
1397   .feature_index = 
1398   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1399 };
1400
1401 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1402   .node_name = "ip4-source-check-via-any",
1403   .runs_before = {"ip4-policer-classify", 0},
1404   .feature_index = 
1405   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1406 };
1407
1408 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check, static) = {
1409   .node_name = "ip4-source-and-port-range-check",
1410   .runs_before = {"ip4-policer-classify", 0},
1411   .feature_index =
1412   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1413 };
1414
1415 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1416   .node_name = "ip4-policer-classify",
1417   .runs_before = {"ipsec-input-ip4", 0},
1418   .feature_index =
1419   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1420 };
1421
1422 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1423   .node_name = "ipsec-input-ip4",
1424   .runs_before = {"vpath-input-ip4", 0},
1425   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1426 };
1427
1428 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1429   .node_name = "vpath-input-ip4",
1430   .runs_before = {"ip4-lookup", 0},
1431   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1432 };
1433
1434 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1435   .node_name = "ip4-lookup",
1436   .runs_before = {0}, /* not before any other features */
1437   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1438 };
1439
1440 /* Built-in ip4 multicast rx feature path definition */
1441 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1442   .node_name = "vpath-input-ip4",
1443   .runs_before = {"ip4-lookup-multicast", 0},
1444   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1445 };
1446
1447 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1448   .node_name = "ip4-lookup-multicast",
1449   .runs_before = {0}, /* not before any other features */
1450   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1451 };
1452
1453 static char * feature_start_nodes[] = 
1454   { "ip4-input", "ip4-input-no-checksum"};
1455
1456 static clib_error_t *
1457 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1458 {
1459   ip_lookup_main_t * lm = &im->lookup_main;
1460   clib_error_t * error;
1461   vnet_cast_t cast;
1462
1463   for (cast = 0; cast < VNET_N_CAST; cast++)
1464     {
1465       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1466       vnet_config_main_t * vcm = &cm->config_main;
1467
1468       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1469                                          feature_start_nodes,
1470                                          ARRAY_LEN(feature_start_nodes),
1471                                          cast,
1472                                          1 /* is_ip4 */)))
1473         return error;
1474     }
1475   return 0;
1476 }
1477
1478 static clib_error_t *
1479 ip4_sw_interface_add_del (vnet_main_t * vnm,
1480                           u32 sw_if_index,
1481                           u32 is_add)
1482 {
1483   vlib_main_t * vm = vnm->vlib_main;
1484   ip4_main_t * im = &ip4_main;
1485   ip_lookup_main_t * lm = &im->lookup_main;
1486   u32 ci, cast;
1487   u32 feature_index;
1488
1489   for (cast = 0; cast < VNET_N_CAST; cast++)
1490     {
1491       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1492       vnet_config_main_t * vcm = &cm->config_main;
1493
1494       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1495       ci = cm->config_index_by_sw_if_index[sw_if_index];
1496
1497       if (cast == VNET_UNICAST)
1498         feature_index = im->ip4_unicast_rx_feature_lookup;
1499       else
1500         feature_index = im->ip4_multicast_rx_feature_lookup;
1501
1502       if (is_add)
1503         ci = vnet_config_add_feature (vm, vcm,
1504                                       ci,
1505                                       feature_index,
1506                                       /* config data */ 0,
1507                                       /* # bytes of config data */ 0);
1508       else
1509         ci = vnet_config_del_feature (vm, vcm,
1510                                       ci,
1511                                       feature_index,
1512                                       /* config data */ 0,
1513                                       /* # bytes of config data */ 0);
1514
1515       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1516     }
1517
1518   return /* no error */ 0;
1519 }
1520
1521 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1522
1523 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1524
1525 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1526   .function = ip4_lookup,
1527   .name = "ip4-lookup",
1528   .vector_size = sizeof (u32),
1529
1530   .format_trace = format_ip4_lookup_trace,
1531
1532   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1533   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1534 };
1535
1536 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1537
1538 static uword
1539 ip4_indirect (vlib_main_t * vm,
1540                vlib_node_runtime_t * node,
1541                vlib_frame_t * frame)
1542 {
1543   return ip4_lookup_inline (vm, node, frame,
1544                             /* lookup_for_responses_to_locally_received_packets */ 0,
1545                             /* is_indirect */ 1);
1546 }
1547
1548 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1549   .function = ip4_indirect,
1550   .name = "ip4-indirect",
1551   .vector_size = sizeof (u32),
1552   .sibling_of = "ip4-lookup",
1553   .format_trace = format_ip4_lookup_trace,
1554
1555   .n_next_nodes = 0,
1556 };
1557
1558 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1559
1560
1561 /* Global IP4 main. */
1562 ip4_main_t ip4_main;
1563
1564 clib_error_t *
1565 ip4_lookup_init (vlib_main_t * vm)
1566 {
1567   ip4_main_t * im = &ip4_main;
1568   clib_error_t * error;
1569   uword i;
1570
1571   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1572     {
1573       u32 m;
1574
1575       if (i < 32)
1576         m = pow2_mask (i) << (32 - i);
1577       else 
1578         m = ~0;
1579       im->fib_masks[i] = clib_host_to_net_u32 (m);
1580     }
1581
1582   /* Create FIB with index 0 and table id of 0. */
1583   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1584
1585   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1586
1587   {
1588     pg_node_t * pn;
1589     pn = pg_get_node (ip4_lookup_node.index);
1590     pn->unformat_edit = unformat_pg_ip4_header;
1591   }
1592
1593   {
1594     ethernet_arp_header_t h;
1595
1596     memset (&h, 0, sizeof (h));
1597
1598     /* Set target ethernet address to all zeros. */
1599     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1600
1601 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1602 #define _8(f,v) h.f = v;
1603     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1604     _16 (l3_type, ETHERNET_TYPE_IP4);
1605     _8 (n_l2_address_bytes, 6);
1606     _8 (n_l3_address_bytes, 4);
1607     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1608 #undef _16
1609 #undef _8
1610
1611     vlib_packet_template_init (vm,
1612                                &im->ip4_arp_request_packet_template,
1613                                /* data */ &h,
1614                                sizeof (h),
1615                                /* alloc chunk size */ 8,
1616                                "ip4 arp");
1617   }
1618
1619   error = ip4_feature_init (vm, im);
1620
1621   return error;
1622 }
1623
1624 VLIB_INIT_FUNCTION (ip4_lookup_init);
1625
1626 typedef struct {
1627   /* Adjacency taken. */
1628   u32 adj_index;
1629   u32 flow_hash;
1630   u32 fib_index;
1631
1632   /* Packet data, possibly *after* rewrite. */
1633   u8 packet_data[64 - 1*sizeof(u32)];
1634 } ip4_forward_next_trace_t;
1635
1636 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1637 {
1638   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1639   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1640   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1641   uword indent = format_get_indent (s);
1642   s = format (s, "%U%U",
1643                 format_white_space, indent,
1644                 format_ip4_header, t->packet_data);
1645   return s;
1646 }
1647
1648 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1649 {
1650   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1651   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1652   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1653   vnet_main_t * vnm = vnet_get_main();
1654   ip4_main_t * im = &ip4_main;
1655   uword indent = format_get_indent (s);
1656
1657   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1658               t->fib_index, t->adj_index, format_ip_adjacency,
1659               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1660   s = format (s, "\n%U%U",
1661               format_white_space, indent,
1662               format_ip4_header, t->packet_data);
1663   return s;
1664 }
1665
1666 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1667 {
1668   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1669   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1670   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1671   vnet_main_t * vnm = vnet_get_main();
1672   ip4_main_t * im = &ip4_main;
1673   uword indent = format_get_indent (s);
1674
1675   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1676               t->fib_index, t->adj_index, format_ip_adjacency,
1677               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1678   s = format (s, "\n%U%U",
1679               format_white_space, indent,
1680               format_ip_adjacency_packet_data,
1681               vnm, &im->lookup_main, t->adj_index,
1682               t->packet_data, sizeof (t->packet_data));
1683   return s;
1684 }
1685
1686 /* Common trace function for all ip4-forward next nodes. */
1687 void
1688 ip4_forward_next_trace (vlib_main_t * vm,
1689                         vlib_node_runtime_t * node,
1690                         vlib_frame_t * frame,
1691                         vlib_rx_or_tx_t which_adj_index)
1692 {
1693   u32 * from, n_left;
1694   ip4_main_t * im = &ip4_main;
1695
1696   n_left = frame->n_vectors;
1697   from = vlib_frame_vector_args (frame);
1698   
1699   while (n_left >= 4)
1700     {
1701       u32 bi0, bi1;
1702       vlib_buffer_t * b0, * b1;
1703       ip4_forward_next_trace_t * t0, * t1;
1704
1705       /* Prefetch next iteration. */
1706       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1707       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1708
1709       bi0 = from[0];
1710       bi1 = from[1];
1711
1712       b0 = vlib_get_buffer (vm, bi0);
1713       b1 = vlib_get_buffer (vm, bi1);
1714
1715       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1716         {
1717           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1718           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1719           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1720           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1721               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1722               vec_elt (im->fib_index_by_sw_if_index,
1723                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1724
1725           clib_memcpy (t0->packet_data,
1726                   vlib_buffer_get_current (b0),
1727                   sizeof (t0->packet_data));
1728         }
1729       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1730         {
1731           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1732           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1733           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1734           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1735               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1736               vec_elt (im->fib_index_by_sw_if_index,
1737                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1738           clib_memcpy (t1->packet_data,
1739                   vlib_buffer_get_current (b1),
1740                   sizeof (t1->packet_data));
1741         }
1742       from += 2;
1743       n_left -= 2;
1744     }
1745
1746   while (n_left >= 1)
1747     {
1748       u32 bi0;
1749       vlib_buffer_t * b0;
1750       ip4_forward_next_trace_t * t0;
1751
1752       bi0 = from[0];
1753
1754       b0 = vlib_get_buffer (vm, bi0);
1755
1756       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1757         {
1758           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1759           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1760           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1761           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1762               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1763               vec_elt (im->fib_index_by_sw_if_index,
1764                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1765           clib_memcpy (t0->packet_data,
1766                   vlib_buffer_get_current (b0),
1767                   sizeof (t0->packet_data));
1768         }
1769       from += 1;
1770       n_left -= 1;
1771     }
1772 }
1773
1774 static uword
1775 ip4_drop_or_punt (vlib_main_t * vm,
1776                   vlib_node_runtime_t * node,
1777                   vlib_frame_t * frame,
1778                   ip4_error_t error_code)
1779 {
1780   u32 * buffers = vlib_frame_vector_args (frame);
1781   uword n_packets = frame->n_vectors;
1782
1783   vlib_error_drop_buffers (vm, node,
1784                            buffers,
1785                            /* stride */ 1,
1786                            n_packets,
1787                            /* next */ 0,
1788                            ip4_input_node.index,
1789                            error_code);
1790
1791   if (node->flags & VLIB_NODE_FLAG_TRACE)
1792     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1793
1794   return n_packets;
1795 }
1796
1797 static uword
1798 ip4_drop (vlib_main_t * vm,
1799           vlib_node_runtime_t * node,
1800           vlib_frame_t * frame)
1801 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1802
1803 static uword
1804 ip4_punt (vlib_main_t * vm,
1805           vlib_node_runtime_t * node,
1806           vlib_frame_t * frame)
1807 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1808
1809 static uword
1810 ip4_miss (vlib_main_t * vm,
1811           vlib_node_runtime_t * node,
1812           vlib_frame_t * frame)
1813 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1814
1815 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1816   .function = ip4_drop,
1817   .name = "ip4-drop",
1818   .vector_size = sizeof (u32),
1819
1820   .format_trace = format_ip4_forward_next_trace,
1821
1822   .n_next_nodes = 1,
1823   .next_nodes = {
1824     [0] = "error-drop",
1825   },
1826 };
1827
1828 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1829
1830 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1831   .function = ip4_punt,
1832   .name = "ip4-punt",
1833   .vector_size = sizeof (u32),
1834
1835   .format_trace = format_ip4_forward_next_trace,
1836
1837   .n_next_nodes = 1,
1838   .next_nodes = {
1839     [0] = "error-punt",
1840   },
1841 };
1842
1843 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1844
1845 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1846   .function = ip4_miss,
1847   .name = "ip4-miss",
1848   .vector_size = sizeof (u32),
1849
1850   .format_trace = format_ip4_forward_next_trace,
1851
1852   .n_next_nodes = 1,
1853   .next_nodes = {
1854     [0] = "error-drop",
1855   },
1856 };
1857
1858 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1859
1860 /* Compute TCP/UDP/ICMP4 checksum in software. */
1861 u16
1862 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1863                               ip4_header_t * ip0)
1864 {
1865   ip_csum_t sum0;
1866   u32 ip_header_length, payload_length_host_byte_order;
1867   u32 n_this_buffer, n_bytes_left;
1868   u16 sum16;
1869   void * data_this_buffer;
1870   
1871   /* Initialize checksum with ip header. */
1872   ip_header_length = ip4_header_bytes (ip0);
1873   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1874   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1875
1876   if (BITS (uword) == 32)
1877     {
1878       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1879       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1880     }
1881   else
1882     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1883
1884   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1885   data_this_buffer = (void *) ip0 + ip_header_length;
1886   if (n_this_buffer + ip_header_length > p0->current_length)
1887     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1888   while (1)
1889     {
1890       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1891       n_bytes_left -= n_this_buffer;
1892       if (n_bytes_left == 0)
1893         break;
1894
1895       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1896       p0 = vlib_get_buffer (vm, p0->next_buffer);
1897       data_this_buffer = vlib_buffer_get_current (p0);
1898       n_this_buffer = p0->current_length;
1899     }
1900
1901   sum16 = ~ ip_csum_fold (sum0);
1902
1903   return sum16;
1904 }
1905
1906 static u32
1907 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1908 {
1909   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1910   udp_header_t * udp0;
1911   u16 sum16;
1912
1913   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1914           || ip0->protocol == IP_PROTOCOL_UDP);
1915
1916   udp0 = (void *) (ip0 + 1);
1917   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1918     {
1919       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1920                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1921       return p0->flags;
1922     }
1923
1924   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1925
1926   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1927                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1928
1929   return p0->flags;
1930 }
1931
1932 static uword
1933 ip4_local (vlib_main_t * vm,
1934            vlib_node_runtime_t * node,
1935            vlib_frame_t * frame)
1936 {
1937   ip4_main_t * im = &ip4_main;
1938   ip_lookup_main_t * lm = &im->lookup_main;
1939   ip_local_next_t next_index;
1940   u32 * from, * to_next, n_left_from, n_left_to_next;
1941   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1942
1943   from = vlib_frame_vector_args (frame);
1944   n_left_from = frame->n_vectors;
1945   next_index = node->cached_next_index;
1946   
1947   if (node->flags & VLIB_NODE_FLAG_TRACE)
1948     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1949
1950   while (n_left_from > 0)
1951     {
1952       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1953
1954       while (n_left_from >= 4 && n_left_to_next >= 2)
1955         {
1956           vlib_buffer_t * p0, * p1;
1957           ip4_header_t * ip0, * ip1;
1958           udp_header_t * udp0, * udp1;
1959           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1960           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1961           ip_adjacency_t * adj0, * adj1;
1962           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1963           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1964           i32 len_diff0, len_diff1;
1965           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1966           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1967           u8 enqueue_code;
1968       
1969           pi0 = to_next[0] = from[0];
1970           pi1 = to_next[1] = from[1];
1971           from += 2;
1972           n_left_from -= 2;
1973           to_next += 2;
1974           n_left_to_next -= 2;
1975       
1976           p0 = vlib_get_buffer (vm, pi0);
1977           p1 = vlib_get_buffer (vm, pi1);
1978
1979           ip0 = vlib_buffer_get_current (p0);
1980           ip1 = vlib_buffer_get_current (p1);
1981
1982           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1983                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1984           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1985                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1986
1987           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1988           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1989
1990           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1991
1992           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1993           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1994
1995           /* Treat IP frag packets as "experimental" protocol for now
1996              until support of IP frag reassembly is implemented */
1997           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1998           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1999           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2000           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2001           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2002           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2003
2004           flags0 = p0->flags;
2005           flags1 = p1->flags;
2006
2007           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2008           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2009
2010           udp0 = ip4_next_header (ip0);
2011           udp1 = ip4_next_header (ip1);
2012
2013           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2014           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2015           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2016
2017           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2018           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2019
2020           /* Verify UDP length. */
2021           ip_len0 = clib_net_to_host_u16 (ip0->length);
2022           ip_len1 = clib_net_to_host_u16 (ip1->length);
2023           udp_len0 = clib_net_to_host_u16 (udp0->length);
2024           udp_len1 = clib_net_to_host_u16 (udp1->length);
2025
2026           len_diff0 = ip_len0 - udp_len0;
2027           len_diff1 = ip_len1 - udp_len1;
2028
2029           len_diff0 = is_udp0 ? len_diff0 : 0;
2030           len_diff1 = is_udp1 ? len_diff1 : 0;
2031
2032           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2033                                 & good_tcp_udp0 & good_tcp_udp1)))
2034             {
2035               if (is_tcp_udp0)
2036                 {
2037                   if (is_tcp_udp0
2038                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2039                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2040                   good_tcp_udp0 =
2041                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2042                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2043                 }
2044               if (is_tcp_udp1)
2045                 {
2046                   if (is_tcp_udp1
2047                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2048                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2049                   good_tcp_udp1 =
2050                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2051                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2052                 }
2053             }
2054
2055           good_tcp_udp0 &= len_diff0 >= 0;
2056           good_tcp_udp1 &= len_diff1 >= 0;
2057
2058           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2059           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2060
2061           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2062
2063           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2064           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2065
2066           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2067           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2068                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2069                     : error0);
2070           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2071                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2072                     : error1);
2073
2074           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2075           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2076
2077           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2078           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2079
2080           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2081           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2082
2083           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2084                                                            &ip0->src_address,
2085                                                            /* no_default_route */ 1));
2086           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2087                                                            &ip1->src_address,
2088                                                            /* no_default_route */ 1));
2089
2090           adj0 = ip_get_adjacency (lm, adj_index0);
2091           adj1 = ip_get_adjacency (lm, adj_index1);
2092
2093           /* 
2094            * Must have a route to source otherwise we drop the packet.
2095            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2096            */
2097           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2098                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2099                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2100                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2101                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2102                     ? IP4_ERROR_SRC_LOOKUP_MISS
2103                     : error0);
2104           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2105                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2106                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2107                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2108                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2109                     ? IP4_ERROR_SRC_LOOKUP_MISS
2110                     : error1);
2111
2112           next0 = lm->local_next_by_ip_protocol[proto0];
2113           next1 = lm->local_next_by_ip_protocol[proto1];
2114
2115           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2116           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2117
2118           p0->error = error0 ? error_node->errors[error0] : 0;
2119           p1->error = error1 ? error_node->errors[error1] : 0;
2120
2121           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2122
2123           if (PREDICT_FALSE (enqueue_code != 0))
2124             {
2125               switch (enqueue_code)
2126                 {
2127                 case 1:
2128                   /* A B A */
2129                   to_next[-2] = pi1;
2130                   to_next -= 1;
2131                   n_left_to_next += 1;
2132                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2133                   break;
2134
2135                 case 2:
2136                   /* A A B */
2137                   to_next -= 1;
2138                   n_left_to_next += 1;
2139                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2140                   break;
2141
2142                 case 3:
2143                   /* A B B or A B C */
2144                   to_next -= 2;
2145                   n_left_to_next += 2;
2146                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2147                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2148                   if (next0 == next1)
2149                     {
2150                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2151                       next_index = next1;
2152                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2153                     }
2154                   break;
2155                 }
2156             }
2157         }
2158
2159       while (n_left_from > 0 && n_left_to_next > 0)
2160         {
2161           vlib_buffer_t * p0;
2162           ip4_header_t * ip0;
2163           udp_header_t * udp0;
2164           ip4_fib_mtrie_t * mtrie0;
2165           ip4_fib_mtrie_leaf_t leaf0;
2166           ip_adjacency_t * adj0;
2167           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2168           i32 len_diff0;
2169           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2170       
2171           pi0 = to_next[0] = from[0];
2172           from += 1;
2173           n_left_from -= 1;
2174           to_next += 1;
2175           n_left_to_next -= 1;
2176       
2177           p0 = vlib_get_buffer (vm, pi0);
2178
2179           ip0 = vlib_buffer_get_current (p0);
2180
2181           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2182                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2183
2184           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2185
2186           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2187
2188           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2189
2190           /* Treat IP frag packets as "experimental" protocol for now
2191              until support of IP frag reassembly is implemented */
2192           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2193           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2194           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2195
2196           flags0 = p0->flags;
2197
2198           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2199
2200           udp0 = ip4_next_header (ip0);
2201
2202           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2203           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2204
2205           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2206
2207           /* Verify UDP length. */
2208           ip_len0 = clib_net_to_host_u16 (ip0->length);
2209           udp_len0 = clib_net_to_host_u16 (udp0->length);
2210
2211           len_diff0 = ip_len0 - udp_len0;
2212
2213           len_diff0 = is_udp0 ? len_diff0 : 0;
2214
2215           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2216             {
2217               if (is_tcp_udp0)
2218                 {
2219                   if (is_tcp_udp0
2220                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2221                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2222                   good_tcp_udp0 =
2223                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2224                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2225                 }
2226             }
2227
2228           good_tcp_udp0 &= len_diff0 >= 0;
2229
2230           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2231
2232           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2233
2234           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2235
2236           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2237           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2238                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2239                     : error0);
2240
2241           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2242
2243           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2244           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2245
2246           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2247                                                            &ip0->src_address,
2248                                                            /* no_default_route */ 1));
2249
2250           adj0 = ip_get_adjacency (lm, adj_index0);
2251
2252           /* Must have a route to source otherwise we drop the packet. */
2253           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2254                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2255                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2256                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2257                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2258                     ? IP4_ERROR_SRC_LOOKUP_MISS
2259                     : error0);
2260
2261           next0 = lm->local_next_by_ip_protocol[proto0];
2262
2263           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2264
2265           p0->error = error0? error_node->errors[error0] : 0;
2266
2267           if (PREDICT_FALSE (next0 != next_index))
2268             {
2269               n_left_to_next += 1;
2270               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2271
2272               next_index = next0;
2273               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2274               to_next[0] = pi0;
2275               to_next += 1;
2276               n_left_to_next -= 1;
2277             }
2278         }
2279   
2280       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2281     }
2282
2283   return frame->n_vectors;
2284 }
2285
2286 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2287   .function = ip4_local,
2288   .name = "ip4-local",
2289   .vector_size = sizeof (u32),
2290
2291   .format_trace = format_ip4_forward_next_trace,
2292
2293   .n_next_nodes = IP_LOCAL_N_NEXT,
2294   .next_nodes = {
2295     [IP_LOCAL_NEXT_DROP] = "error-drop",
2296     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2297     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2298     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2299   },
2300 };
2301
2302 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2303
2304 void ip4_register_protocol (u32 protocol, u32 node_index)
2305 {
2306   vlib_main_t * vm = vlib_get_main();
2307   ip4_main_t * im = &ip4_main;
2308   ip_lookup_main_t * lm = &im->lookup_main;
2309
2310   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2311   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2312 }
2313
2314 static clib_error_t *
2315 show_ip_local_command_fn (vlib_main_t * vm,
2316                           unformat_input_t * input,
2317                          vlib_cli_command_t * cmd)
2318 {
2319   ip4_main_t * im = &ip4_main;
2320   ip_lookup_main_t * lm = &im->lookup_main;
2321   int i;
2322
2323   vlib_cli_output (vm, "Protocols handled by ip4_local");
2324   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2325     {
2326       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2327         vlib_cli_output (vm, "%d", i);
2328     }
2329   return 0;
2330 }
2331
2332
2333
2334 VLIB_CLI_COMMAND (show_ip_local, static) = {
2335   .path = "show ip local",
2336   .function = show_ip_local_command_fn,
2337   .short_help = "Show ip local protocol table",
2338 };
2339
2340 static uword
2341 ip4_arp (vlib_main_t * vm,
2342          vlib_node_runtime_t * node,
2343          vlib_frame_t * frame)
2344 {
2345   vnet_main_t * vnm = vnet_get_main();
2346   ip4_main_t * im = &ip4_main;
2347   ip_lookup_main_t * lm = &im->lookup_main;
2348   u32 * from, * to_next_drop;
2349   uword n_left_from, n_left_to_next_drop, next_index;
2350   static f64 time_last_seed_change = -1e100;
2351   static u32 hash_seeds[3];
2352   static uword hash_bitmap[256 / BITS (uword)]; 
2353   f64 time_now;
2354
2355   if (node->flags & VLIB_NODE_FLAG_TRACE)
2356     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2357
2358   time_now = vlib_time_now (vm);
2359   if (time_now - time_last_seed_change > 1e-3)
2360     {
2361       uword i;
2362       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2363                                              sizeof (hash_seeds));
2364       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2365         hash_seeds[i] = r[i];
2366
2367       /* Mark all hash keys as been no-seen before. */
2368       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2369         hash_bitmap[i] = 0;
2370
2371       time_last_seed_change = time_now;
2372     }
2373
2374   from = vlib_frame_vector_args (frame);
2375   n_left_from = frame->n_vectors;
2376   next_index = node->cached_next_index;
2377   if (next_index == IP4_ARP_NEXT_DROP)
2378     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2379
2380   while (n_left_from > 0)
2381     {
2382       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2383                            to_next_drop, n_left_to_next_drop);
2384
2385       while (n_left_from > 0 && n_left_to_next_drop > 0)
2386         {
2387           vlib_buffer_t * p0;
2388           ip4_header_t * ip0;
2389           ethernet_header_t * eh0;
2390           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2391           uword bm0;
2392           ip_adjacency_t * adj0;
2393
2394           pi0 = from[0];
2395
2396           p0 = vlib_get_buffer (vm, pi0);
2397
2398           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2399           adj0 = ip_get_adjacency (lm, adj_index0);
2400           ip0 = vlib_buffer_get_current (p0);
2401
2402           /* If packet destination is not local, send ARP to next hop */
2403           if (adj0->arp.next_hop.ip4.as_u32)
2404             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2405
2406           /* 
2407            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2408            * rewrite to this packet, we need to skip it here.
2409            * Note, to distinguish from src IP addr *.8.6.*, we
2410            * check for a bcast eth dest instead of IPv4 version.
2411            */
2412           eh0 = (ethernet_header_t*)ip0;
2413           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2414             {
2415               u32 vlan_num = 0;
2416               u16 * etype = &eh0->type;
2417               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2418                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2419                 {
2420                   vlan_num += 1;
2421                   etype += 2; //vlan tag also 16 bits, same as etype
2422                 }
2423               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2424                 {
2425                   vlib_buffer_advance (
2426                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2427                   ip0 = vlib_buffer_get_current (p0);
2428                 }
2429             }
2430
2431           a0 = hash_seeds[0];
2432           b0 = hash_seeds[1];
2433           c0 = hash_seeds[2];
2434
2435           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2436           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2437
2438           a0 ^= ip0->dst_address.data_u32;
2439           b0 ^= sw_if_index0;
2440
2441           hash_v3_finalize32 (a0, b0, c0);
2442
2443           c0 &= BITS (hash_bitmap) - 1;
2444           c0 = c0 / BITS (uword);
2445           m0 = (uword) 1 << (c0 % BITS (uword));
2446
2447           bm0 = hash_bitmap[c0];
2448           drop0 = (bm0 & m0) != 0;
2449
2450           /* Mark it as seen. */
2451           hash_bitmap[c0] = bm0 | m0;
2452
2453           from += 1;
2454           n_left_from -= 1;
2455           to_next_drop[0] = pi0;
2456           to_next_drop += 1;
2457           n_left_to_next_drop -= 1;
2458
2459           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2460
2461           if (drop0)
2462             continue;
2463
2464           /* 
2465            * Can happen if the control-plane is programming tables
2466            * with traffic flowing; at least that's today's lame excuse.
2467            */
2468           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2469             {
2470               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2471             }
2472           else
2473           /* Send ARP request. */
2474           {
2475             u32 bi0 = 0;
2476             vlib_buffer_t * b0;
2477             ethernet_arp_header_t * h0;
2478             vnet_hw_interface_t * hw_if0;
2479
2480             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2481
2482             /* Add rewrite/encap string for ARP packet. */
2483             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2484
2485             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2486
2487             /* Src ethernet address in ARP header. */
2488             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2489                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2490
2491             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2492                 //No source address available
2493                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2494                 vlib_buffer_free(vm, &bi0, 1);
2495                 continue;
2496             }
2497
2498             /* Copy in destination address we are requesting. */
2499             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2500
2501             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2502             b0 = vlib_get_buffer (vm, bi0);
2503             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2504
2505             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2506
2507             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2508           }
2509         }
2510
2511       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2512     }
2513
2514   return frame->n_vectors;
2515 }
2516
2517 static char * ip4_arp_error_strings[] = {
2518   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2519   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2520   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2521   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2522   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2523   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2524 };
2525
2526 VLIB_REGISTER_NODE (ip4_arp_node) = {
2527   .function = ip4_arp,
2528   .name = "ip4-arp",
2529   .vector_size = sizeof (u32),
2530
2531   .format_trace = format_ip4_forward_next_trace,
2532
2533   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2534   .error_strings = ip4_arp_error_strings,
2535
2536   .n_next_nodes = IP4_ARP_N_NEXT,
2537   .next_nodes = {
2538     [IP4_ARP_NEXT_DROP] = "error-drop",
2539   },
2540 };
2541
2542 #define foreach_notrace_ip4_arp_error           \
2543 _(DROP)                                         \
2544 _(REQUEST_SENT)                                 \
2545 _(REPLICATE_DROP)                               \
2546 _(REPLICATE_FAIL)
2547
2548 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2549 {
2550   vlib_node_runtime_t *rt = 
2551     vlib_node_get_runtime (vm, ip4_arp_node.index);
2552
2553   /* don't trace ARP request packets */
2554 #define _(a)                                    \
2555     vnet_pcap_drop_trace_filter_add_del         \
2556         (rt->errors[IP4_ARP_ERROR_##a],         \
2557          1 /* is_add */);
2558     foreach_notrace_ip4_arp_error;
2559 #undef _
2560   return 0;
2561 }
2562
2563 VLIB_INIT_FUNCTION(arp_notrace_init);
2564
2565
2566 /* Send an ARP request to see if given destination is reachable on given interface. */
2567 clib_error_t *
2568 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2569 {
2570   vnet_main_t * vnm = vnet_get_main();
2571   ip4_main_t * im = &ip4_main;
2572   ethernet_arp_header_t * h;
2573   ip4_address_t * src;
2574   ip_interface_address_t * ia;
2575   ip_adjacency_t * adj;
2576   vnet_hw_interface_t * hi;
2577   vnet_sw_interface_t * si;
2578   vlib_buffer_t * b;
2579   u32 bi = 0;
2580
2581   si = vnet_get_sw_interface (vnm, sw_if_index);
2582
2583   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2584     {
2585       return clib_error_return (0, "%U: interface %U down",
2586                                 format_ip4_address, dst, 
2587                                 format_vnet_sw_if_index_name, vnm, 
2588                                 sw_if_index);
2589     }
2590
2591   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2592   if (! src)
2593     {
2594       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2595       return clib_error_return 
2596         (0, "no matching interface address for destination %U (interface %U)",
2597          format_ip4_address, dst,
2598          format_vnet_sw_if_index_name, vnm, sw_if_index);
2599     }
2600
2601   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2602
2603   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2604
2605   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2606
2607   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2608
2609   h->ip4_over_ethernet[0].ip4 = src[0];
2610   h->ip4_over_ethernet[1].ip4 = dst[0];
2611
2612   b = vlib_get_buffer (vm, bi);
2613   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2614
2615   /* Add encapsulation string for software interface (e.g. ethernet header). */
2616   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2617   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2618
2619   {
2620     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2621     u32 * to_next = vlib_frame_vector_args (f);
2622     to_next[0] = bi;
2623     f->n_vectors = 1;
2624     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2625   }
2626
2627   return /* no error */ 0;
2628 }
2629
2630 typedef enum {
2631   IP4_REWRITE_NEXT_DROP,
2632   IP4_REWRITE_NEXT_ARP,
2633   IP4_REWRITE_NEXT_ICMP_ERROR,
2634 } ip4_rewrite_next_t;
2635
2636 always_inline uword
2637 ip4_rewrite_inline (vlib_main_t * vm,
2638                     vlib_node_runtime_t * node,
2639                     vlib_frame_t * frame,
2640                     int rewrite_for_locally_received_packets)
2641 {
2642   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2643   u32 * from = vlib_frame_vector_args (frame);
2644   u32 n_left_from, n_left_to_next, * to_next, next_index;
2645   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2646   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2647
2648   n_left_from = frame->n_vectors;
2649   next_index = node->cached_next_index;
2650   u32 cpu_index = os_get_cpu_number();
2651   
2652   while (n_left_from > 0)
2653     {
2654       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2655
2656       while (n_left_from >= 4 && n_left_to_next >= 2)
2657         {
2658           ip_adjacency_t * adj0, * adj1;
2659           vlib_buffer_t * p0, * p1;
2660           ip4_header_t * ip0, * ip1;
2661           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2662           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2663           u32 next0_override, next1_override;
2664       
2665           if (rewrite_for_locally_received_packets)
2666               next0_override = next1_override = 0;
2667
2668           /* Prefetch next iteration. */
2669           {
2670             vlib_buffer_t * p2, * p3;
2671
2672             p2 = vlib_get_buffer (vm, from[2]);
2673             p3 = vlib_get_buffer (vm, from[3]);
2674
2675             vlib_prefetch_buffer_header (p2, STORE);
2676             vlib_prefetch_buffer_header (p3, STORE);
2677
2678             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2679             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2680           }
2681
2682           pi0 = to_next[0] = from[0];
2683           pi1 = to_next[1] = from[1];
2684
2685           from += 2;
2686           n_left_from -= 2;
2687           to_next += 2;
2688           n_left_to_next -= 2;
2689       
2690           p0 = vlib_get_buffer (vm, pi0);
2691           p1 = vlib_get_buffer (vm, pi1);
2692
2693           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2694           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2695
2696           /* We should never rewrite a pkt using the MISS adjacency */
2697           ASSERT(adj_index0 && adj_index1);
2698
2699           ip0 = vlib_buffer_get_current (p0);
2700           ip1 = vlib_buffer_get_current (p1);
2701
2702           error0 = error1 = IP4_ERROR_NONE;
2703           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2704
2705           /* Decrement TTL & update checksum.
2706              Works either endian, so no need for byte swap. */
2707           if (! rewrite_for_locally_received_packets)
2708             {
2709               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2710
2711               /* Input node should have reject packets with ttl 0. */
2712               ASSERT (ip0->ttl > 0);
2713               ASSERT (ip1->ttl > 0);
2714
2715               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2716               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2717
2718               checksum0 += checksum0 >= 0xffff;
2719               checksum1 += checksum1 >= 0xffff;
2720
2721               ip0->checksum = checksum0;
2722               ip1->checksum = checksum1;
2723
2724               ttl0 -= 1;
2725               ttl1 -= 1;
2726
2727               ip0->ttl = ttl0;
2728               ip1->ttl = ttl1;
2729
2730               /*
2731                * If the ttl drops below 1 when forwarding, generate
2732                * an ICMP response.
2733                */
2734               if (PREDICT_FALSE(ttl0 <= 0))
2735                 {
2736                   error0 = IP4_ERROR_TIME_EXPIRED;
2737                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2738                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2739                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2740                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2741                 }
2742               if (PREDICT_FALSE(ttl1 <= 0))
2743                 {
2744                   error1 = IP4_ERROR_TIME_EXPIRED;
2745                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2746                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2747                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2748                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2749                 }
2750
2751               /* Verify checksum. */
2752               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2753               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2754             }
2755
2756           /* Rewrite packet header and updates lengths. */
2757           adj0 = ip_get_adjacency (lm, adj_index0);
2758           adj1 = ip_get_adjacency (lm, adj_index1);
2759       
2760           if (rewrite_for_locally_received_packets)
2761             {
2762               /*
2763                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2764                * we end up here with a local adjacency in hand
2765                * The local adj rewrite data is 0xfefe on purpose.
2766                * Bad engineer, no donut for you.
2767                */
2768               if (PREDICT_FALSE(adj0->lookup_next_index 
2769                                 == IP_LOOKUP_NEXT_LOCAL))
2770                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2771               if (PREDICT_FALSE(adj0->lookup_next_index
2772                                 == IP_LOOKUP_NEXT_ARP))
2773                 next0_override = IP4_REWRITE_NEXT_ARP;
2774               if (PREDICT_FALSE(adj1->lookup_next_index 
2775                                 == IP_LOOKUP_NEXT_LOCAL))
2776                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2777               if (PREDICT_FALSE(adj1->lookup_next_index
2778                                 == IP_LOOKUP_NEXT_ARP))
2779                 next1_override = IP4_REWRITE_NEXT_ARP;
2780             }
2781
2782           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2783           rw_len0 = adj0[0].rewrite_header.data_bytes;
2784           rw_len1 = adj1[0].rewrite_header.data_bytes;
2785
2786           /* Check MTU of outgoing interface. */
2787           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2788                     ? IP4_ERROR_MTU_EXCEEDED
2789                     : error0);
2790           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2791                     ? IP4_ERROR_MTU_EXCEEDED
2792                     : error1);
2793
2794           next0 = (error0 == IP4_ERROR_NONE)
2795             ? adj0[0].rewrite_header.next_index : next0;
2796
2797           if (rewrite_for_locally_received_packets)
2798               next0 = next0 && next0_override ? next0_override : next0;
2799
2800           next1 = (error1 == IP4_ERROR_NONE)
2801             ? adj1[0].rewrite_header.next_index : next1;
2802
2803           if (rewrite_for_locally_received_packets)
2804               next1 = next1 && next1_override ? next1_override : next1;
2805
2806           /* 
2807            * We've already accounted for an ethernet_header_t elsewhere
2808            */
2809           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2810               vlib_increment_combined_counter 
2811                   (&lm->adjacency_counters,
2812                    cpu_index, adj_index0, 
2813                    /* packet increment */ 0,
2814                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2815
2816           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2817               vlib_increment_combined_counter 
2818                   (&lm->adjacency_counters,
2819                    cpu_index, adj_index1, 
2820                    /* packet increment */ 0,
2821                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2822
2823           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2824            * to see the IP headerr */
2825           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2826             {
2827               p0->current_data -= rw_len0;
2828               p0->current_length += rw_len0;
2829               p0->error = error_node->errors[error0];
2830               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2831                   adj0[0].rewrite_header.sw_if_index;
2832             }
2833           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2834             {
2835               p1->current_data -= rw_len1;
2836               p1->current_length += rw_len1;
2837               p1->error = error_node->errors[error1];
2838               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2839                   adj1[0].rewrite_header.sw_if_index;
2840             }
2841
2842           /* Guess we are only writing on simple Ethernet header. */
2843           vnet_rewrite_two_headers (adj0[0], adj1[0],
2844                                     ip0, ip1,
2845                                     sizeof (ethernet_header_t));
2846       
2847           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2848                                            to_next, n_left_to_next,
2849                                            pi0, pi1, next0, next1);
2850         }
2851
2852       while (n_left_from > 0 && n_left_to_next > 0)
2853         {
2854           ip_adjacency_t * adj0;
2855           vlib_buffer_t * p0;
2856           ip4_header_t * ip0;
2857           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2858           u32 next0_override;
2859       
2860           if (rewrite_for_locally_received_packets)
2861               next0_override = 0;
2862
2863           pi0 = to_next[0] = from[0];
2864
2865           p0 = vlib_get_buffer (vm, pi0);
2866
2867           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2868
2869           /* We should never rewrite a pkt using the MISS adjacency */
2870           ASSERT(adj_index0);
2871
2872           adj0 = ip_get_adjacency (lm, adj_index0);
2873       
2874           ip0 = vlib_buffer_get_current (p0);
2875
2876           error0 = IP4_ERROR_NONE;
2877           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2878
2879           /* Decrement TTL & update checksum. */
2880           if (! rewrite_for_locally_received_packets)
2881             {
2882               i32 ttl0 = ip0->ttl;
2883
2884               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2885
2886               checksum0 += checksum0 >= 0xffff;
2887
2888               ip0->checksum = checksum0;
2889
2890               ASSERT (ip0->ttl > 0);
2891
2892               ttl0 -= 1;
2893
2894               ip0->ttl = ttl0;
2895
2896               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2897
2898               if (PREDICT_FALSE(ttl0 <= 0))
2899                 {
2900                   /*
2901                    * If the ttl drops below 1 when forwarding, generate
2902                    * an ICMP response.
2903                    */
2904                   error0 = IP4_ERROR_TIME_EXPIRED;
2905                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2906                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2907                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2908                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2909                 }
2910             }
2911
2912           if (rewrite_for_locally_received_packets)
2913             {
2914               /*
2915                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2916                * we end up here with a local adjacency in hand
2917                * The local adj rewrite data is 0xfefe on purpose.
2918                * Bad engineer, no donut for you.
2919                */
2920               if (PREDICT_FALSE(adj0->lookup_next_index 
2921                                 == IP_LOOKUP_NEXT_LOCAL))
2922                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2923               /* 
2924                * We have to override the next_index in ARP adjacencies,
2925                * because they're set up for ip4-arp, not this node...
2926                */
2927               if (PREDICT_FALSE(adj0->lookup_next_index
2928                                 == IP_LOOKUP_NEXT_ARP))
2929                 next0_override = IP4_REWRITE_NEXT_ARP;
2930             }
2931
2932           /* Guess we are only writing on simple Ethernet header. */
2933           vnet_rewrite_one_header (adj0[0], ip0, 
2934                                    sizeof (ethernet_header_t));
2935           
2936           /* Update packet buffer attributes/set output interface. */
2937           rw_len0 = adj0[0].rewrite_header.data_bytes;
2938           
2939           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2940               vlib_increment_combined_counter 
2941                   (&lm->adjacency_counters,
2942                    cpu_index, adj_index0, 
2943                    /* packet increment */ 0,
2944                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2945           
2946           /* Check MTU of outgoing interface. */
2947           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2948                     > adj0[0].rewrite_header.max_l3_packet_bytes
2949                     ? IP4_ERROR_MTU_EXCEEDED
2950                     : error0);
2951
2952           p0->error = error_node->errors[error0];
2953
2954           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2955            * to see the IP headerr */
2956           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2957             {
2958               p0->current_data -= rw_len0;
2959               p0->current_length += rw_len0;
2960
2961               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2962                   adj0[0].rewrite_header.sw_if_index;
2963               next0 = adj0[0].rewrite_header.next_index;
2964             }
2965
2966           if (rewrite_for_locally_received_packets)
2967               next0 = next0 && next0_override ? next0_override : next0;
2968
2969           from += 1;
2970           n_left_from -= 1;
2971           to_next += 1;
2972           n_left_to_next -= 1;
2973       
2974           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2975                                            to_next, n_left_to_next,
2976                                            pi0, next0);
2977         }
2978   
2979       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2980     }
2981
2982   /* Need to do trace after rewrites to pick up new packet data. */
2983   if (node->flags & VLIB_NODE_FLAG_TRACE)
2984     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2985
2986   return frame->n_vectors;
2987 }
2988
2989
2990 /** @brief IPv4 transit rewrite node.
2991     @node ip4-rewrite-transit
2992
2993     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2994     header checksum, fetch the ip adjacency, check the outbound mtu,
2995     apply the adjacency rewrite, and send pkts to the adjacency
2996     rewrite header's rewrite_next_index.
2997
2998     @param vm vlib_main_t corresponding to the current thread
2999     @param node vlib_node_runtime_t
3000     @param frame vlib_frame_t whose contents should be dispatched
3001
3002     @par Graph mechanics: buffer metadata, next index usage
3003
3004     @em Uses:
3005     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3006         - the rewrite adjacency index
3007     - <code>adj->lookup_next_index</code>
3008         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3009           the packet will be dropped. 
3010     - <code>adj->rewrite_header</code>
3011         - Rewrite string length, rewrite string, next_index
3012
3013     @em Sets:
3014     - <code>b->current_data, b->current_length</code>
3015         - Updated net of applying the rewrite string
3016
3017     <em>Next Indices:</em>
3018     - <code> adj->rewrite_header.next_index </code>
3019       or @c error-drop 
3020 */
3021 static uword
3022 ip4_rewrite_transit (vlib_main_t * vm,
3023                      vlib_node_runtime_t * node,
3024                      vlib_frame_t * frame)
3025 {
3026   return ip4_rewrite_inline (vm, node, frame,
3027                              /* rewrite_for_locally_received_packets */ 0);
3028 }
3029
3030 /** @brief IPv4 local rewrite node.
3031     @node ip4-rewrite-local
3032
3033     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3034     the outbound interface mtu, apply the adjacency rewrite, and send
3035     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3036     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3037     dst = interface addr."
3038
3039     @param vm vlib_main_t corresponding to the current thread
3040     @param node vlib_node_runtime_t
3041     @param frame vlib_frame_t whose contents should be dispatched
3042
3043     @par Graph mechanics: buffer metadata, next index usage
3044
3045     @em Uses:
3046     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3047         - the rewrite adjacency index
3048     - <code>adj->lookup_next_index</code>
3049         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3050           the packet will be dropped. 
3051     - <code>adj->rewrite_header</code>
3052         - Rewrite string length, rewrite string, next_index
3053
3054     @em Sets:
3055     - <code>b->current_data, b->current_length</code>
3056         - Updated net of applying the rewrite string
3057
3058     <em>Next Indices:</em>
3059     - <code> adj->rewrite_header.next_index </code>
3060       or @c error-drop 
3061 */
3062
3063 static uword
3064 ip4_rewrite_local (vlib_main_t * vm,
3065                    vlib_node_runtime_t * node,
3066                    vlib_frame_t * frame)
3067 {
3068   return ip4_rewrite_inline (vm, node, frame,
3069                              /* rewrite_for_locally_received_packets */ 1);
3070 }
3071
3072 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3073   .function = ip4_rewrite_transit,
3074   .name = "ip4-rewrite-transit",
3075   .vector_size = sizeof (u32),
3076
3077   .format_trace = format_ip4_rewrite_trace,
3078
3079   .n_next_nodes = 3,
3080   .next_nodes = {
3081     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3082     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3083     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3084   },
3085 };
3086
3087 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3088
3089 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3090   .function = ip4_rewrite_local,
3091   .name = "ip4-rewrite-local",
3092   .vector_size = sizeof (u32),
3093
3094   .sibling_of = "ip4-rewrite-transit",
3095
3096   .format_trace = format_ip4_rewrite_trace,
3097
3098   .n_next_nodes = 0,
3099 };
3100
3101 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3102
3103 static clib_error_t *
3104 add_del_interface_table (vlib_main_t * vm,
3105                          unformat_input_t * input,
3106                          vlib_cli_command_t * cmd)
3107 {
3108   vnet_main_t * vnm = vnet_get_main();
3109   clib_error_t * error = 0;
3110   u32 sw_if_index, table_id;
3111
3112   sw_if_index = ~0;
3113
3114   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3115     {
3116       error = clib_error_return (0, "unknown interface `%U'",
3117                                  format_unformat_error, input);
3118       goto done;
3119     }
3120
3121   if (unformat (input, "%d", &table_id))
3122     ;
3123   else
3124     {
3125       error = clib_error_return (0, "expected table id `%U'",
3126                                  format_unformat_error, input);
3127       goto done;
3128     }
3129
3130   {
3131     ip4_main_t * im = &ip4_main;
3132     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3133
3134     if (fib) 
3135       {
3136         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3137         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3138     }
3139   }
3140
3141  done:
3142   return error;
3143 }
3144
3145 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3146   .path = "set interface ip table",
3147   .function = add_del_interface_table,
3148   .short_help = "Add/delete FIB table id for interface",
3149 };
3150
3151
3152 static uword
3153 ip4_lookup_multicast (vlib_main_t * vm,
3154                       vlib_node_runtime_t * node,
3155                       vlib_frame_t * frame)
3156 {
3157   ip4_main_t * im = &ip4_main;
3158   ip_lookup_main_t * lm = &im->lookup_main;
3159   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3160   u32 n_left_from, n_left_to_next, * from, * to_next;
3161   ip_lookup_next_t next;
3162   u32 cpu_index = os_get_cpu_number();
3163
3164   from = vlib_frame_vector_args (frame);
3165   n_left_from = frame->n_vectors;
3166   next = node->cached_next_index;
3167
3168   while (n_left_from > 0)
3169     {
3170       vlib_get_next_frame (vm, node, next,
3171                            to_next, n_left_to_next);
3172
3173       while (n_left_from >= 4 && n_left_to_next >= 2)
3174         {
3175           vlib_buffer_t * p0, * p1;
3176           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3177           ip_lookup_next_t next0, next1;
3178           ip4_header_t * ip0, * ip1;
3179           ip_adjacency_t * adj0, * adj1;
3180           u32 fib_index0, fib_index1;
3181           u32 flow_hash_config0, flow_hash_config1;
3182
3183           /* Prefetch next iteration. */
3184           {
3185             vlib_buffer_t * p2, * p3;
3186
3187             p2 = vlib_get_buffer (vm, from[2]);
3188             p3 = vlib_get_buffer (vm, from[3]);
3189
3190             vlib_prefetch_buffer_header (p2, LOAD);
3191             vlib_prefetch_buffer_header (p3, LOAD);
3192
3193             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3194             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3195           }
3196
3197           pi0 = to_next[0] = from[0];
3198           pi1 = to_next[1] = from[1];
3199
3200           p0 = vlib_get_buffer (vm, pi0);
3201           p1 = vlib_get_buffer (vm, pi1);
3202
3203           ip0 = vlib_buffer_get_current (p0);
3204           ip1 = vlib_buffer_get_current (p1);
3205
3206           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3207           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3208           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3209             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3210           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3211             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3212
3213           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3214                                               &ip0->dst_address, p0);
3215           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3216                                               &ip1->dst_address, p1);
3217
3218           adj0 = ip_get_adjacency (lm, adj_index0);
3219           adj1 = ip_get_adjacency (lm, adj_index1);
3220
3221           next0 = adj0->lookup_next_index;
3222           next1 = adj1->lookup_next_index;
3223
3224           flow_hash_config0 = 
3225               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3226
3227           flow_hash_config1 = 
3228               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3229
3230           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3231               (ip0, flow_hash_config0);
3232                                                                   
3233           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3234               (ip1, flow_hash_config1);
3235
3236           ASSERT (adj0->n_adj > 0);
3237           ASSERT (adj1->n_adj > 0);
3238           ASSERT (is_pow2 (adj0->n_adj));
3239           ASSERT (is_pow2 (adj1->n_adj));
3240           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3241           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3242
3243           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3244           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3245
3246           if (1) /* $$$$$$ HACK FIXME */
3247           vlib_increment_combined_counter 
3248               (cm, cpu_index, adj_index0, 1,
3249                vlib_buffer_length_in_chain (vm, p0));
3250           if (1) /* $$$$$$ HACK FIXME */
3251           vlib_increment_combined_counter 
3252               (cm, cpu_index, adj_index1, 1,
3253                vlib_buffer_length_in_chain (vm, p1));
3254
3255           from += 2;
3256           to_next += 2;
3257           n_left_to_next -= 2;
3258           n_left_from -= 2;
3259
3260           wrong_next = (next0 != next) + 2*(next1 != next);
3261           if (PREDICT_FALSE (wrong_next != 0))
3262             {
3263               switch (wrong_next)
3264                 {
3265                 case 1:
3266                   /* A B A */
3267                   to_next[-2] = pi1;
3268                   to_next -= 1;
3269                   n_left_to_next += 1;
3270                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3271                   break;
3272
3273                 case 2:
3274                   /* A A B */
3275                   to_next -= 1;
3276                   n_left_to_next += 1;
3277                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3278                   break;
3279
3280                 case 3:
3281                   /* A B C */
3282                   to_next -= 2;
3283                   n_left_to_next += 2;
3284                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3285                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3286                   if (next0 == next1)
3287                     {
3288                       /* A B B */
3289                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3290                       next = next1;
3291                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3292                     }
3293                 }
3294             }
3295         }
3296     
3297       while (n_left_from > 0 && n_left_to_next > 0)
3298         {
3299           vlib_buffer_t * p0;
3300           ip4_header_t * ip0;
3301           u32 pi0, adj_index0;
3302           ip_lookup_next_t next0;
3303           ip_adjacency_t * adj0;
3304           u32 fib_index0;
3305           u32 flow_hash_config0;
3306
3307           pi0 = from[0];
3308           to_next[0] = pi0;
3309
3310           p0 = vlib_get_buffer (vm, pi0);
3311
3312           ip0 = vlib_buffer_get_current (p0);
3313
3314           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3315                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3316           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3317               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3318           
3319           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3320                                               &ip0->dst_address, p0);
3321
3322           adj0 = ip_get_adjacency (lm, adj_index0);
3323
3324           next0 = adj0->lookup_next_index;
3325
3326           flow_hash_config0 = 
3327               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3328
3329           vnet_buffer (p0)->ip.flow_hash = 
3330             ip4_compute_flow_hash (ip0, flow_hash_config0);
3331
3332           ASSERT (adj0->n_adj > 0);
3333           ASSERT (is_pow2 (adj0->n_adj));
3334           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3335
3336           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3337
3338           if (1) /* $$$$$$ HACK FIXME */
3339               vlib_increment_combined_counter 
3340                   (cm, cpu_index, adj_index0, 1,
3341                    vlib_buffer_length_in_chain (vm, p0));
3342
3343           from += 1;
3344           to_next += 1;
3345           n_left_to_next -= 1;
3346           n_left_from -= 1;
3347
3348           if (PREDICT_FALSE (next0 != next))
3349             {
3350               n_left_to_next += 1;
3351               vlib_put_next_frame (vm, node, next, n_left_to_next);
3352               next = next0;
3353               vlib_get_next_frame (vm, node, next,
3354                                    to_next, n_left_to_next);
3355               to_next[0] = pi0;
3356               to_next += 1;
3357               n_left_to_next -= 1;
3358             }
3359         }
3360
3361       vlib_put_next_frame (vm, node, next, n_left_to_next);
3362     }
3363
3364   if (node->flags & VLIB_NODE_FLAG_TRACE)
3365       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3366
3367   return frame->n_vectors;
3368 }
3369
3370 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3371   .function = ip4_lookup_multicast,
3372   .name = "ip4-lookup-multicast",
3373   .vector_size = sizeof (u32),
3374   .sibling_of = "ip4-lookup",
3375   .format_trace = format_ip4_lookup_trace,
3376
3377   .n_next_nodes = 0,
3378 };
3379
3380 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3381
3382 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3383   .function = ip4_drop,
3384   .name = "ip4-multicast",
3385   .vector_size = sizeof (u32),
3386
3387   .format_trace = format_ip4_forward_next_trace,
3388
3389   .n_next_nodes = 1,
3390   .next_nodes = {
3391     [0] = "error-drop",
3392   },
3393 };
3394
3395 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3396 {
3397   ip4_main_t * im = &ip4_main;
3398   ip4_fib_mtrie_t * mtrie0;
3399   ip4_fib_mtrie_leaf_t leaf0;
3400   u32 adj_index0;
3401     
3402   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3403
3404   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3405   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3406   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3407   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3408   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3409   
3410   /* Handle default route. */
3411   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3412   
3413   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3414   
3415   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3416                                                   a, 
3417                                                   /* no_default_route */ 0);
3418 }
3419  
3420 static clib_error_t *
3421 test_lookup_command_fn (vlib_main_t * vm,
3422                         unformat_input_t * input,
3423                         vlib_cli_command_t * cmd)
3424 {
3425   u32 table_id = 0;
3426   f64 count = 1;
3427   u32 n;
3428   int i;
3429   ip4_address_t ip4_base_address;
3430   u64 errors = 0;
3431
3432   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3433       if (unformat (input, "table %d", &table_id))
3434         ;
3435       else if (unformat (input, "count %f", &count))
3436         ;
3437
3438       else if (unformat (input, "%U",
3439                          unformat_ip4_address, &ip4_base_address))
3440         ;
3441       else
3442         return clib_error_return (0, "unknown input `%U'",
3443                                   format_unformat_error, input);
3444   }
3445
3446   n = count;
3447
3448   for (i = 0; i < n; i++)
3449     {
3450       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3451         errors++;
3452
3453       ip4_base_address.as_u32 = 
3454         clib_host_to_net_u32 (1 + 
3455                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3456     }
3457
3458   if (errors) 
3459     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3460   else
3461     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3462
3463   return 0;
3464 }
3465
3466 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3467     .path = "test lookup",
3468     .short_help = "test lookup",
3469     .function = test_lookup_command_fn,
3470 };
3471
3472 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3473 {
3474   ip4_main_t * im4 = &ip4_main;
3475   ip4_fib_t * fib;
3476   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3477
3478   if (p == 0)
3479     return VNET_API_ERROR_NO_SUCH_FIB;
3480
3481   fib = vec_elt_at_index (im4->fibs, p[0]);
3482
3483   fib->flow_hash_config = flow_hash_config;
3484   return 0;
3485 }
3486  
3487 static clib_error_t *
3488 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3489                              unformat_input_t * input,
3490                              vlib_cli_command_t * cmd)
3491 {
3492   int matched = 0;
3493   u32 table_id = 0;
3494   u32 flow_hash_config = 0;
3495   int rv;
3496
3497   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3498     if (unformat (input, "table %d", &table_id))
3499       matched = 1;
3500 #define _(a,v) \
3501     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3502     foreach_flow_hash_bit
3503 #undef _
3504     else break;
3505   }
3506   
3507   if (matched == 0)
3508     return clib_error_return (0, "unknown input `%U'",
3509                               format_unformat_error, input);
3510   
3511   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3512   switch (rv)
3513     {
3514     case 0:
3515       break;
3516       
3517     case VNET_API_ERROR_NO_SUCH_FIB:
3518       return clib_error_return (0, "no such FIB table %d", table_id);
3519       
3520     default:
3521       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3522       break;
3523     }
3524   
3525   return 0;
3526 }
3527  
3528 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3529   .path = "set ip flow-hash",
3530   .short_help = 
3531   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3532   .function = set_ip_flow_hash_command_fn,
3533 };
3534  
3535 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3536                                  u32 table_index)
3537 {
3538   vnet_main_t * vnm = vnet_get_main();
3539   vnet_interface_main_t * im = &vnm->interface_main;
3540   ip4_main_t * ipm = &ip4_main;
3541   ip_lookup_main_t * lm = &ipm->lookup_main;
3542   vnet_classify_main_t * cm = &vnet_classify_main;
3543
3544   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3545     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3546
3547   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3548     return VNET_API_ERROR_NO_SUCH_ENTRY;
3549
3550   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3551   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3552
3553   return 0;
3554 }
3555
3556 static clib_error_t *
3557 set_ip_classify_command_fn (vlib_main_t * vm,
3558                             unformat_input_t * input,
3559                             vlib_cli_command_t * cmd)
3560 {
3561   u32 table_index = ~0;
3562   int table_index_set = 0;
3563   u32 sw_if_index = ~0;
3564   int rv;
3565   
3566   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3567     if (unformat (input, "table-index %d", &table_index))
3568       table_index_set = 1;
3569     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3570                        vnet_get_main(), &sw_if_index))
3571       ;
3572     else
3573       break;
3574   }
3575       
3576   if (table_index_set == 0)
3577     return clib_error_return (0, "classify table-index must be specified");
3578
3579   if (sw_if_index == ~0)
3580     return clib_error_return (0, "interface / subif must be specified");
3581
3582   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3583
3584   switch (rv)
3585     {
3586     case 0:
3587       break;
3588
3589     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3590       return clib_error_return (0, "No such interface");
3591
3592     case VNET_API_ERROR_NO_SUCH_ENTRY:
3593       return clib_error_return (0, "No such classifier table");
3594     }
3595   return 0;
3596 }
3597
3598 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3599     .path = "set ip classify",
3600     .short_help = 
3601     "set ip classify intfc <int> table-index <index>",
3602     .function = set_ip_classify_command_fn,
3603 };
3604