dd9fce4592b107fe865ee861d5ef8ed277c33136
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>   
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>     
51
52 /** @file
53     vnet ip4 forwarding
54 */
55
56 /* This is really, really simple but stupid fib. */
57 u32
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
59                            ip4_address_t * dst,
60                            u32 disable_default_route)
61 {
62   ip_lookup_main_t * lm = &im->lookup_main;
63   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64   uword * p, * hash, key;
65   i32 i, i_min, dst_address, ai;
66
67   i_min = disable_default_route ? 1 : 0;
68   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
70     {
71       hash = fib->adj_index_by_dst_address[i];
72       if (! hash)
73         continue;
74
75       key = dst_address & im->fib_masks[i];
76       if ((p = hash_get (hash, key)) != 0)
77         {
78           ai = p[0];
79           goto done;
80         }
81     }
82
83   /* Nothing matches in table. */
84   ai = lm->miss_adj_index;
85
86  done:
87   return ai;
88 }
89
90 /** @brief Create FIB from table ID and init all hashing.
91     @param im - @ref ip4_main_t
92     @param table_id - table ID
93     @return fib - @ref ip4_fib_t
94 */
95 static ip4_fib_t *
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
97 {
98   ip4_fib_t * fib;
99   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100   vec_add2 (im->fibs, fib, 1);
101   fib->table_id = table_id;
102   fib->index = fib - im->fibs;
103   /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105   fib->fwd_classify_table_index = ~0;
106   fib->rev_classify_table_index = ~0;
107   ip4_mtrie_init (&fib->mtrie);
108   return fib;
109 }
110
111 /** @brief Find existing or Create new FIB based on index
112     @param im @ref ip4_main_t
113     @param table_index_or_id - overloaded parameter referring
114            to the table or a table's index in the FIB vector
115     @param flags - used to check if table_index_or_id was a table or
116            an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117     @return either the existing or a new ip4_fib_t entry
118 */
119 ip4_fib_t *
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121                                    u32 table_index_or_id, u32 flags)
122 {
123   uword * p, fib_index;
124
125   fib_index = table_index_or_id;
126   /* If this isn't a FIB_INDEX ... */
127   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
128     {
129       /* If passed ~0 then request the next table available */
130       if (table_index_or_id == ~0) {
131         table_index_or_id = 0;
132         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
133           table_index_or_id++;
134         }
135         /* Create the next table and return the ip4_fib_t associated with it */
136         return create_fib_with_table_id (im, table_index_or_id);
137       }
138       /* A specific table_id was requested.. */
139       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140       /* ... and if it doesn't exist create it else grab its index */
141       if (! p)
142         return create_fib_with_table_id (im, table_index_or_id);
143       fib_index = p[0];
144     }
145   /* Return the ip4_fib_t associated with this index */
146   return vec_elt_at_index (im->fibs, fib_index);
147 }
148
149 static void
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
151                                        ip4_fib_t * fib,
152                                        u32 address_length)
153 {
154   hash_t * h;
155   uword max_index;
156
157   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
159
160   fib->adj_index_by_dst_address[address_length] =
161     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
162
163   hash_set_flags (fib->adj_index_by_dst_address[address_length],
164                   HASH_FLAG_NO_AUTO_SHRINK);
165
166   h = hash_header (fib->adj_index_by_dst_address[address_length]);
167   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
168
169   /* Initialize new/old hash value vectors. */
170   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
172 }
173
174 static void
175 ip4_fib_set_adj_index (ip4_main_t * im,
176                        ip4_fib_t * fib,
177                        u32 flags,
178                        u32 dst_address_u32,
179                        u32 dst_address_length,
180                        u32 adj_index)
181 {
182   ip_lookup_main_t * lm = &im->lookup_main;
183   uword * hash;
184
185   if (vec_bytes(fib->old_hash_values))
186     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187   if (vec_bytes(fib->new_hash_values))
188     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189   fib->new_hash_values[0] = adj_index;
190
191   /* Make sure adj index is valid. */
192   if (CLIB_DEBUG > 0)
193     (void) ip_get_adjacency (lm, adj_index);
194
195   hash = fib->adj_index_by_dst_address[dst_address_length];
196
197   hash = _hash_set3 (hash, dst_address_u32,
198                      fib->new_hash_values,
199                      fib->old_hash_values);
200
201   fib->adj_index_by_dst_address[dst_address_length] = hash;
202
203   if (vec_len (im->add_del_route_callbacks) > 0)
204     {
205       ip4_add_del_route_callback_t * cb;
206       ip4_address_t d;
207       uword * p;
208
209       d.data_u32 = dst_address_u32;
210       vec_foreach (cb, im->add_del_route_callbacks)
211         if ((flags & cb->required_flags) == cb->required_flags)
212           cb->function (im, cb->function_opaque,
213                         fib, flags,
214                         &d, dst_address_length,
215                         fib->old_hash_values,
216                         fib->new_hash_values);
217
218       p = hash_get (hash, dst_address_u32);
219       /* hash_get should never return NULL here */
220       if (p)
221           clib_memcpy (p, fib->new_hash_values, 
222                        vec_bytes (fib->new_hash_values));
223       else
224           ASSERT(0);
225     }
226 }
227
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
229 {
230   ip_lookup_main_t * lm = &im->lookup_main;
231   ip4_fib_t * fib;
232   u32 dst_address, dst_address_length, adj_index, old_adj_index;
233   uword * hash, is_del;
234   ip4_add_del_route_callback_t * cb;
235
236   /* Either create new adjacency or use given one depending on arguments. */
237   if (a->n_add_adj > 0)
238     {
239       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
241     }
242   else
243     adj_index = a->adj_index;
244
245   dst_address = a->dst_address.data_u32;
246   dst_address_length = a->dst_address_length;
247   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
248
249   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250   dst_address &= im->fib_masks[dst_address_length];
251
252   if (! fib->adj_index_by_dst_address[dst_address_length])
253     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
254
255   hash = fib->adj_index_by_dst_address[dst_address_length];
256
257   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
258
259   if (is_del)
260     {
261       fib->old_hash_values[0] = ~0;
262       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263       fib->adj_index_by_dst_address[dst_address_length] = hash;
264
265       if (vec_len (im->add_del_route_callbacks) > 0
266           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
267         {
268           fib->new_hash_values[0] = ~0;
269           vec_foreach (cb, im->add_del_route_callbacks)
270             if ((a->flags & cb->required_flags) == cb->required_flags)
271               cb->function (im, cb->function_opaque,
272                             fib, a->flags,
273                             &a->dst_address, dst_address_length,
274                             fib->old_hash_values,
275                             fib->new_hash_values);
276         }
277     }
278   else
279     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
280                            adj_index);
281
282   old_adj_index = fib->old_hash_values[0];
283
284   /* Avoid spurious reference count increments */
285   if (old_adj_index == adj_index
286       && adj_index != ~0
287       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
288     {
289       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290       if (adj->share_count > 0)
291         adj->share_count --;
292     }
293
294   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295                                is_del ? old_adj_index : adj_index,
296                                is_del);
297
298   /* Delete old adjacency index if present and changed. */
299   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300       && old_adj_index != ~0
301       && old_adj_index != adj_index)
302     ip_del_adjacency (lm, old_adj_index);
303 }
304
305
306 u32
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
308                             u32 fib_index,
309                             ip4_address_t *next_hop,
310                             u32 next_hop_sw_if_index,
311                             u32 explicit_fib_index)
312 {
313   ip_lookup_main_t * lm = &im->lookup_main;
314   vnet_main_t * vnm = vnet_get_main();
315   uword * nh_hash, * nh_result;
316   int is_interface_next_hop;
317   u32 nh_adj_index;
318   ip4_fib_t * fib;
319
320   fib = vec_elt_at_index (im->fibs, fib_index);
321
322   is_interface_next_hop = next_hop->data_u32 == 0;
323   if (is_interface_next_hop)
324     {
325       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
326       if (nh_result)
327           nh_adj_index = *nh_result;
328       else
329         {
330            ip_adjacency_t * adj;
331            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
332                                    &nh_adj_index);
333            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
336         }
337     }
338   else if (next_hop_sw_if_index == ~0)
339     {
340       /* next-hop is recursive. we always need a indirect adj
341        * for recursive paths. Any LPM we perform now will give
342        * us a valid adj, but without tracking the next-hop we
343        * have no way to keep it valid.
344        */
345       ip_adjacency_t add_adj;
346       memset (&add_adj, 0, sizeof(add_adj));
347       add_adj.n_adj = 1;
348       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350       add_adj.explicit_fib_index = explicit_fib_index;
351       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
352     }
353   else
354     {
355       nh_hash = fib->adj_index_by_dst_address[32];
356       nh_result = hash_get (nh_hash, next_hop->data_u32);
357
358       /* Next hop must be known. */
359       if (! nh_result)
360         {
361           ip_adjacency_t * adj;
362
363           /* no /32 exists, get the longest prefix match */
364           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
365                                                     next_hop, 0);
366           adj = ip_get_adjacency (lm, nh_adj_index);
367           /* if ARP interface adjacency is present, we need to
368              install ARP adjaceny for specific next hop */
369           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370               adj->arp.next_hop.ip4.as_u32 == 0)
371             {
372               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
373             }
374         }
375       else
376         {
377           nh_adj_index = *nh_result;
378         }
379     }
380
381   return (nh_adj_index);
382 }
383
384 void
385 ip4_add_del_route_next_hop (ip4_main_t * im,
386                             u32 flags,
387                             ip4_address_t * dst_address,
388                             u32 dst_address_length,
389                             ip4_address_t * next_hop,
390                             u32 next_hop_sw_if_index,
391                             u32 next_hop_weight, u32 adj_index, 
392                             u32 explicit_fib_index)
393 {
394   vnet_main_t * vnm = vnet_get_main();
395   ip_lookup_main_t * lm = &im->lookup_main;
396   u32 fib_index;
397   ip4_fib_t * fib;
398   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399   u32 dst_adj_index, nh_adj_index;
400   uword * dst_hash, * dst_result;
401   ip_adjacency_t * dst_adj;
402   ip_multipath_adjacency_t * old_mp, * new_mp;
403   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404   clib_error_t * error = 0;
405
406   if (explicit_fib_index == (u32)~0)
407       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
408   else
409       fib_index = explicit_fib_index;
410
411   fib = vec_elt_at_index (im->fibs, fib_index);
412
413   /* Lookup next hop to be added or deleted. */
414   if (adj_index == (u32)~0)
415     {
416         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
417                                                   next_hop,
418                                                   next_hop_sw_if_index,
419                                                   explicit_fib_index);
420     }
421   else
422     {
423       nh_adj_index = adj_index;
424     }
425   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
427
428   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429   dst_result = hash_get (dst_hash, dst_address_u32);
430   if (dst_result)
431     {
432       dst_adj_index = dst_result[0];
433       dst_adj = ip_get_adjacency (lm, dst_adj_index);
434     }
435   else
436     {
437       /* For deletes destination must be known. */
438       if (is_del)
439         {
440           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441           error = clib_error_return (0, "unknown destination %U/%d",
442                                      format_ip4_address, dst_address,
443                                      dst_address_length);
444           goto done;
445         }
446
447       dst_adj_index = ~0;
448       dst_adj = 0;
449     }
450
451   /* Ignore adds of X/32 with next hop of X. */
452   if (! is_del
453       && dst_address_length == 32
454       && dst_address->data_u32 == next_hop->data_u32 
455       && adj_index != (u32)~0)
456     {
457       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458       error = clib_error_return (0, "prefix matches next hop %U/%d",
459                                  format_ip4_address, dst_address,
460                                  dst_address_length);
461       goto done;
462     }
463
464   /* Destination is not known and default weight is set so add route
465      to existing non-multipath adjacency */
466   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
467     {
468       /* create / delete additional mapping of existing adjacency */
469       ip4_add_del_route_args_t a;
470
471       a.table_index_or_table_id = fib_index;
472       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473                  | IP4_ROUTE_FLAG_FIB_INDEX
474                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477       a.dst_address = dst_address[0];
478       a.dst_address_length = dst_address_length;
479       a.adj_index = nh_adj_index;
480       a.add_adj = 0;
481       a.n_add_adj = 0;
482
483       ip4_add_del_route (im, &a);
484       goto done;
485     }
486
487   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
488
489   if (! ip_multipath_adjacency_add_del_next_hop
490       (lm, is_del,
491        old_mp_adj_index,
492        nh_adj_index,
493        next_hop_weight,
494        &new_mp_adj_index))
495     {
496       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498                                  format_ip4_address, next_hop);
499       goto done;
500     }
501   
502   old_mp = new_mp = 0;
503   if (old_mp_adj_index != ~0)
504     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505   if (new_mp_adj_index != ~0)
506     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
507
508   if (old_mp != new_mp)
509     {
510       ip4_add_del_route_args_t a;
511       ip_adjacency_t * adj;
512
513       a.table_index_or_table_id = fib_index;
514       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515                  | IP4_ROUTE_FLAG_FIB_INDEX
516                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518       a.dst_address = dst_address[0];
519       a.dst_address_length = dst_address_length;
520       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
521       a.add_adj = 0;
522       a.n_add_adj = 0;
523
524       ip4_add_del_route (im, &a);
525
526       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
527       if (adj->n_adj == 1)
528         adj->share_count += is_del ? -1 : 1;
529     }
530
531  done:
532   if (error)
533     clib_error_report (error);
534 }
535
536 void *
537 ip4_get_route (ip4_main_t * im,
538                u32 table_index_or_table_id,
539                u32 flags,
540                u8 * address,
541                u32 address_length)
542 {
543   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544   u32 dst_address = * (u32 *) address;
545   uword * hash, * p;
546
547   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548   dst_address &= im->fib_masks[address_length];
549
550   hash = fib->adj_index_by_dst_address[address_length];
551   p = hash_get (hash, dst_address);
552   return (void *) p;
553 }
554
555 void
556 ip4_foreach_matching_route (ip4_main_t * im,
557                             u32 table_index_or_table_id,
558                             u32 flags,
559                             ip4_address_t * address,
560                             u32 address_length,
561                             ip4_address_t ** results,
562                             u8 ** result_lengths)
563 {
564   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565   u32 dst_address = address->data_u32;
566   u32 this_length = address_length;
567   
568   if (*results)
569     _vec_len (*results) = 0;
570   if (*result_lengths)
571     _vec_len (*result_lengths) = 0;
572
573   while (this_length <= 32 && vec_len (results) == 0)
574     {
575       uword k, v;
576       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
578           {
579             ip4_address_t a;
580             a.data_u32 = k;
581             vec_add1 (*results, a);
582             vec_add1 (*result_lengths, this_length);
583           }
584       }));
585
586       this_length++;
587     }
588 }
589
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591                                   u32 table_index_or_table_id,
592                                   u32 flags)
593 {
594   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595   ip_lookup_main_t * lm = &im->lookup_main;
596   u32 i, l;
597   ip4_address_t a;
598   ip4_add_del_route_callback_t * cb;
599   static ip4_address_t * to_delete;
600
601   if (lm->n_adjacency_remaps == 0)
602     return;
603
604   for (l = 0; l <= 32; l++)
605     {
606       hash_pair_t * p;
607       uword * hash = fib->adj_index_by_dst_address[l];
608
609       if (hash_elts (hash) == 0)
610         continue;
611
612       if (to_delete)
613         _vec_len (to_delete) = 0;
614
615       hash_foreach_pair (p, hash, ({
616         u32 adj_index = p->value[0];
617         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
618
619         if (m)
620           {
621             /* Record destination address from hash key. */
622             a.data_u32 = p->key;
623
624             /* New adjacency points to nothing: so delete prefix. */
625             if (m == ~0)
626               vec_add1 (to_delete, a);
627             else
628               {
629                 /* Remap to new adjacency. */
630                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
631
632                 /* Set new adjacency value. */
633                 fib->new_hash_values[0] = p->value[0] = m - 1;
634
635                 vec_foreach (cb, im->add_del_route_callbacks)
636                   if ((flags & cb->required_flags) == cb->required_flags)
637                     cb->function (im, cb->function_opaque,
638                                   fib, flags | IP4_ROUTE_FLAG_ADD,
639                                   &a, l,
640                                   fib->old_hash_values,
641                                   fib->new_hash_values);
642               }
643           }
644       }));
645
646       fib->new_hash_values[0] = ~0;
647       for (i = 0; i < vec_len (to_delete); i++)
648         {
649           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650           vec_foreach (cb, im->add_del_route_callbacks)
651             if ((flags & cb->required_flags) == cb->required_flags)
652               cb->function (im, cb->function_opaque,
653                             fib, flags | IP4_ROUTE_FLAG_DEL,
654                             &a, l,
655                             fib->old_hash_values,
656                             fib->new_hash_values);
657         }
658     }
659
660   /* Also remap adjacencies in mtrie. */
661   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
662
663   /* Reset mapping table. */
664   vec_zero (lm->adjacency_remap_table);
665
666   /* All remaps have been performed. */
667   lm->n_adjacency_remaps = 0;
668 }
669
670 void ip4_delete_matching_routes (ip4_main_t * im,
671                                  u32 table_index_or_table_id,
672                                  u32 flags,
673                                  ip4_address_t * address,
674                                  u32 address_length)
675 {
676   static ip4_address_t * matching_addresses;
677   static u8 * matching_address_lengths;
678   u32 l, i;
679   ip4_add_del_route_args_t a;
680
681   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682   a.table_index_or_table_id = table_index_or_table_id;
683   a.adj_index = ~0;
684   a.add_adj = 0;
685   a.n_add_adj = 0;
686
687   for (l = address_length + 1; l <= 32; l++)
688     {
689       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
690                                   address,
691                                   l,
692                                   &matching_addresses,
693                                   &matching_address_lengths);
694       for (i = 0; i < vec_len (matching_addresses); i++)
695         {
696           a.dst_address = matching_addresses[i];
697           a.dst_address_length = matching_address_lengths[i];
698           ip4_add_del_route (im, &a);
699         }
700     }
701
702   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
703 }
704
705 void
706 ip4_forward_next_trace (vlib_main_t * vm,
707                         vlib_node_runtime_t * node,
708                         vlib_frame_t * frame,
709                         vlib_rx_or_tx_t which_adj_index);
710
711 always_inline uword
712 ip4_lookup_inline (vlib_main_t * vm,
713                    vlib_node_runtime_t * node,
714                    vlib_frame_t * frame,
715                    int lookup_for_responses_to_locally_received_packets,
716                    int is_indirect)
717 {
718   ip4_main_t * im = &ip4_main;
719   ip_lookup_main_t * lm = &im->lookup_main;
720   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721   u32 n_left_from, n_left_to_next, * from, * to_next;
722   ip_lookup_next_t next;
723   u32 cpu_index = os_get_cpu_number();
724
725   from = vlib_frame_vector_args (frame);
726   n_left_from = frame->n_vectors;
727   next = node->cached_next_index;
728
729   while (n_left_from > 0)
730     {
731       vlib_get_next_frame (vm, node, next,
732                            to_next, n_left_to_next);
733
734       while (n_left_from >= 4 && n_left_to_next >= 2)
735         {
736           vlib_buffer_t * p0, * p1;
737           ip4_header_t * ip0, * ip1;
738           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739           ip_lookup_next_t next0, next1;
740           ip_adjacency_t * adj0, * adj1;
741           ip4_fib_mtrie_t * mtrie0, * mtrie1;
742           ip4_fib_mtrie_leaf_t leaf0, leaf1;
743           ip4_address_t * dst_addr0, *dst_addr1;
744           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746           u32 flow_hash_config0, flow_hash_config1;
747           u32 hash_c0, hash_c1;
748           u32 wrong_next;
749
750           /* Prefetch next iteration. */
751           {
752             vlib_buffer_t * p2, * p3;
753
754             p2 = vlib_get_buffer (vm, from[2]);
755             p3 = vlib_get_buffer (vm, from[3]);
756
757             vlib_prefetch_buffer_header (p2, LOAD);
758             vlib_prefetch_buffer_header (p3, LOAD);
759
760             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
762           }
763
764           pi0 = to_next[0] = from[0];
765           pi1 = to_next[1] = from[1];
766
767           p0 = vlib_get_buffer (vm, pi0);
768           p1 = vlib_get_buffer (vm, pi1);
769
770           ip0 = vlib_buffer_get_current (p0);
771           ip1 = vlib_buffer_get_current (p1);
772
773           if (is_indirect)
774             {
775               ip_adjacency_t * iadj0, * iadj1;
776               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778               dst_addr0 = &iadj0->indirect.next_hop.ip4;
779               dst_addr1 = &iadj1->indirect.next_hop.ip4;
780             }
781           else
782             {
783               dst_addr0 = &ip0->dst_address;
784               dst_addr1 = &ip1->dst_address;
785             }
786
787           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
793
794
795           if (! lookup_for_responses_to_locally_received_packets)
796             {
797               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
799
800               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
801
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
804             }
805
806           tcp0 = (void *) (ip0 + 1);
807           tcp1 = (void *) (ip1 + 1);
808
809           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810                          || ip0->protocol == IP_PROTOCOL_UDP);
811           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812                          || ip1->protocol == IP_PROTOCOL_UDP);
813
814           if (! lookup_for_responses_to_locally_received_packets)
815             {
816               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
818             }
819
820           if (! lookup_for_responses_to_locally_received_packets)
821             {
822               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
824             }
825
826           if (! lookup_for_responses_to_locally_received_packets)
827             {
828               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
830             }
831
832           if (lookup_for_responses_to_locally_received_packets)
833             {
834               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
836             }
837           else
838             {
839               /* Handle default route. */
840               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
842
843               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
845             }
846
847           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
848                                                            dst_addr0,
849                                                            /* no_default_route */ 0));
850           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
851                                                            dst_addr1,
852                                                            /* no_default_route */ 0));
853           adj0 = ip_get_adjacency (lm, adj_index0);
854           adj1 = ip_get_adjacency (lm, adj_index1);
855
856           next0 = adj0->lookup_next_index;
857           next1 = adj1->lookup_next_index;
858
859           /* Use flow hash to compute multipath adjacency. */
860           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862           if (PREDICT_FALSE (adj0->n_adj > 1))
863             {
864               flow_hash_config0 = 
865                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
867                 ip4_compute_flow_hash (ip0, flow_hash_config0);
868             }
869           if (PREDICT_FALSE(adj1->n_adj > 1))
870             {
871               flow_hash_config1 = 
872                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
874                 ip4_compute_flow_hash (ip1, flow_hash_config1);
875             }
876
877           ASSERT (adj0->n_adj > 0);
878           ASSERT (adj1->n_adj > 0);
879           ASSERT (is_pow2 (adj0->n_adj));
880           ASSERT (is_pow2 (adj1->n_adj));
881           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
883
884           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
886
887           vlib_increment_combined_counter 
888               (cm, cpu_index, adj_index0, 1,
889                vlib_buffer_length_in_chain (vm, p0) 
890                + sizeof(ethernet_header_t));
891           vlib_increment_combined_counter 
892               (cm, cpu_index, adj_index1, 1,
893                vlib_buffer_length_in_chain (vm, p1)
894                + sizeof(ethernet_header_t));
895
896           from += 2;
897           to_next += 2;
898           n_left_to_next -= 2;
899           n_left_from -= 2;
900
901           wrong_next = (next0 != next) + 2*(next1 != next);
902           if (PREDICT_FALSE (wrong_next != 0))
903             {
904               switch (wrong_next)
905                 {
906                 case 1:
907                   /* A B A */
908                   to_next[-2] = pi1;
909                   to_next -= 1;
910                   n_left_to_next += 1;
911                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
912                   break;
913
914                 case 2:
915                   /* A A B */
916                   to_next -= 1;
917                   n_left_to_next += 1;
918                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
919                   break;
920
921                 case 3:
922                   /* A B C */
923                   to_next -= 2;
924                   n_left_to_next += 2;
925                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
926                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
927                   if (next0 == next1)
928                     {
929                       /* A B B */
930                       vlib_put_next_frame (vm, node, next, n_left_to_next);
931                       next = next1;
932                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
933                     }
934                 }
935             }
936         }
937     
938       while (n_left_from > 0 && n_left_to_next > 0)
939         {
940           vlib_buffer_t * p0;
941           ip4_header_t * ip0;
942           __attribute__((unused)) tcp_header_t * tcp0;
943           ip_lookup_next_t next0;
944           ip_adjacency_t * adj0;
945           ip4_fib_mtrie_t * mtrie0;
946           ip4_fib_mtrie_leaf_t leaf0;
947           ip4_address_t * dst_addr0;
948           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
949           u32 flow_hash_config0, hash_c0;
950
951           pi0 = from[0];
952           to_next[0] = pi0;
953
954           p0 = vlib_get_buffer (vm, pi0);
955
956           ip0 = vlib_buffer_get_current (p0);
957
958           if (is_indirect)
959             {
960               ip_adjacency_t * iadj0;
961               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
962               dst_addr0 = &iadj0->indirect.next_hop.ip4;
963             }
964           else
965             {
966               dst_addr0 = &ip0->dst_address;
967             }
968
969           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
970           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
971             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
972
973           if (! lookup_for_responses_to_locally_received_packets)
974             {
975               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
976
977               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
978
979               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
980             }
981
982           tcp0 = (void *) (ip0 + 1);
983
984           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
985                          || ip0->protocol == IP_PROTOCOL_UDP);
986
987           if (! lookup_for_responses_to_locally_received_packets)
988             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
989
990           if (! lookup_for_responses_to_locally_received_packets)
991             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
992
993           if (! lookup_for_responses_to_locally_received_packets)
994             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
995
996           if (lookup_for_responses_to_locally_received_packets)
997             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
998           else
999             {
1000               /* Handle default route. */
1001               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1002               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1003             }
1004
1005           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1006                                                            dst_addr0,
1007                                                            /* no_default_route */ 0));
1008
1009           adj0 = ip_get_adjacency (lm, adj_index0);
1010
1011           next0 = adj0->lookup_next_index;
1012
1013           /* Use flow hash to compute multipath adjacency. */
1014           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1015           if (PREDICT_FALSE(adj0->n_adj > 1))
1016             {
1017               flow_hash_config0 = 
1018                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1019
1020               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1021                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1022             }
1023
1024           ASSERT (adj0->n_adj > 0);
1025           ASSERT (is_pow2 (adj0->n_adj));
1026           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1027
1028           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1029
1030           vlib_increment_combined_counter 
1031               (cm, cpu_index, adj_index0, 1,
1032                vlib_buffer_length_in_chain (vm, p0)
1033                + sizeof(ethernet_header_t));
1034
1035           from += 1;
1036           to_next += 1;
1037           n_left_to_next -= 1;
1038           n_left_from -= 1;
1039
1040           if (PREDICT_FALSE (next0 != next))
1041             {
1042               n_left_to_next += 1;
1043               vlib_put_next_frame (vm, node, next, n_left_to_next);
1044               next = next0;
1045               vlib_get_next_frame (vm, node, next,
1046                                    to_next, n_left_to_next);
1047               to_next[0] = pi0;
1048               to_next += 1;
1049               n_left_to_next -= 1;
1050             }
1051         }
1052
1053       vlib_put_next_frame (vm, node, next, n_left_to_next);
1054     }
1055
1056   if (node->flags & VLIB_NODE_FLAG_TRACE)
1057     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1058
1059   return frame->n_vectors;
1060 }
1061
1062 /** @brief IPv4 lookup node.
1063     @node ip4-lookup
1064
1065     This is the main IPv4 lookup dispatch node.
1066
1067     @param vm vlib_main_t corresponding to the current thread
1068     @param node vlib_node_runtime_t
1069     @param frame vlib_frame_t whose contents should be dispatched
1070
1071     @par Graph mechanics: buffer metadata, next index usage
1072
1073     @em Uses:
1074     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1075         - Indicates the @c sw_if_index value of the interface that the
1076           packet was received on.
1077     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1078         - When the value is @c ~0 then the node performs a longest prefix
1079           match (LPM) for the packet destination address in the FIB attached
1080           to the receive interface.
1081         - Otherwise perform LPM for the packet destination address in the
1082           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1083           value (0, 1, ...) and not a VRF id.
1084
1085     @em Sets:
1086     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1087         - The lookup result adjacency index.
1088
1089     <em>Next Index:</em>
1090     - Dispatches the packet to the node index found in
1091       ip_adjacency_t @c adj->lookup_next_index
1092       (where @c adj is the lookup result adjacency).
1093 */
1094 static uword
1095 ip4_lookup (vlib_main_t * vm,
1096             vlib_node_runtime_t * node,
1097             vlib_frame_t * frame)
1098 {
1099   return ip4_lookup_inline (vm, node, frame,
1100                             /* lookup_for_responses_to_locally_received_packets */ 0,
1101                             /* is_indirect */ 0);
1102
1103 }
1104
1105 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1106                                         ip_adjacency_t * adj,
1107                                         u32 sw_if_index,
1108                                         u32 if_address_index)
1109 {
1110   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1111   ip_lookup_next_t n;
1112   vnet_l3_packet_type_t packet_type;
1113   u32 node_index;
1114
1115   if (hw->hw_class_index == ethernet_hw_interface_class.index
1116       || hw->hw_class_index == srp_hw_interface_class.index)
1117     {
1118       /* 
1119        * We have a bit of a problem in this case. ip4-arp uses
1120        * the rewrite_header.next_index to hand pkts to the
1121        * indicated inteface output node. We can end up in
1122        * ip4_rewrite_local, too, which also pays attention to 
1123        * rewrite_header.next index. Net result: a hack in
1124        * ip4_rewrite_local...
1125        */
1126       n = IP_LOOKUP_NEXT_ARP;
1127       node_index = ip4_arp_node.index;
1128       adj->if_address_index = if_address_index;
1129       adj->arp.next_hop.ip4.as_u32 = 0;
1130       ip46_address_reset(&adj->arp.next_hop);
1131       packet_type = VNET_L3_PACKET_TYPE_ARP;
1132     }
1133   else
1134     {
1135       n = IP_LOOKUP_NEXT_REWRITE;
1136       node_index = ip4_rewrite_node.index;
1137       packet_type = VNET_L3_PACKET_TYPE_IP4;
1138     }
1139
1140   adj->lookup_next_index = n;
1141   vnet_rewrite_for_sw_interface
1142     (vnm,
1143      packet_type,
1144      sw_if_index,
1145      node_index,
1146      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1147      &adj->rewrite_header,
1148      sizeof (adj->rewrite_data));
1149 }
1150
1151 static void
1152 ip4_add_interface_routes (u32 sw_if_index,
1153                           ip4_main_t * im, u32 fib_index,
1154                           ip_interface_address_t * a)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip_lookup_main_t * lm = &im->lookup_main;
1158   ip_adjacency_t * adj;
1159   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1160   ip4_add_del_route_args_t x;
1161   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1162   u32 classify_table_index;
1163
1164   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1165   x.table_index_or_table_id = fib_index;
1166   x.flags = (IP4_ROUTE_FLAG_ADD
1167              | IP4_ROUTE_FLAG_FIB_INDEX
1168              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1169   x.dst_address = address[0];
1170   x.dst_address_length = a->address_length;
1171   x.n_add_adj = 0;
1172   x.add_adj = 0;
1173
1174   a->neighbor_probe_adj_index = ~0;
1175   if (a->address_length < 32)
1176     {
1177       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1178                               &x.adj_index);
1179       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1180       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1181       ip4_add_del_route (im, &x);
1182       a->neighbor_probe_adj_index = x.adj_index;
1183     }
1184   
1185   /* Add e.g. 1.1.1.1/32 as local to this host. */
1186   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1187                           &x.adj_index);
1188   
1189   classify_table_index = ~0;
1190   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1191     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1192   if (classify_table_index != (u32) ~0)
1193     {
1194       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1195       adj->classify.table_index = classify_table_index;
1196     }
1197   else
1198     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1199   
1200   adj->if_address_index = a - lm->if_address_pool;
1201   adj->rewrite_header.sw_if_index = sw_if_index;
1202   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1203   /* 
1204    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1205    * fail an RPF-ish check, but still go thru the rewrite code...
1206    */
1207   adj->rewrite_header.data_bytes = 0;
1208
1209   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1210   x.dst_address_length = 32;
1211   ip4_add_del_route (im, &x);
1212 }
1213
1214 static void
1215 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1216 {
1217   ip4_add_del_route_args_t x;
1218
1219   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1220   x.table_index_or_table_id = fib_index;
1221   x.flags = (IP4_ROUTE_FLAG_DEL
1222              | IP4_ROUTE_FLAG_FIB_INDEX
1223              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1224   x.dst_address = address[0];
1225   x.dst_address_length = address_length;
1226   x.adj_index = ~0;
1227   x.n_add_adj = 0;
1228   x.add_adj = 0;
1229
1230   if (address_length < 32)
1231     ip4_add_del_route (im, &x);
1232
1233   x.dst_address_length = 32;
1234   ip4_add_del_route (im, &x);
1235
1236   ip4_delete_matching_routes (im,
1237                               fib_index,
1238                               IP4_ROUTE_FLAG_FIB_INDEX,
1239                               address,
1240                               address_length);
1241 }
1242
1243 typedef struct {
1244     u32 sw_if_index;
1245     ip4_address_t address;
1246     u32 length;
1247 } ip4_interface_address_t;
1248
1249 static clib_error_t *
1250 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1251                                         u32 sw_if_index,
1252                                         ip4_address_t * new_address,
1253                                         u32 new_length,
1254                                         u32 redistribute,
1255                                         u32 insert_routes,
1256                                         u32 is_del);
1257
1258 static clib_error_t *
1259 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1260                                         u32 sw_if_index,
1261                                         ip4_address_t * address,
1262                                         u32 address_length,
1263                                         u32 redistribute,
1264                                         u32 insert_routes,
1265                                         u32 is_del)
1266 {
1267   vnet_main_t * vnm = vnet_get_main();
1268   ip4_main_t * im = &ip4_main;
1269   ip_lookup_main_t * lm = &im->lookup_main;
1270   clib_error_t * error = 0;
1271   u32 if_address_index, elts_before;
1272   ip4_address_fib_t ip4_af, * addr_fib = 0;
1273
1274   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1275   ip4_addr_fib_init (&ip4_af, address,
1276                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1277   vec_add1 (addr_fib, ip4_af);
1278
1279   /* When adding an address check that it does not conflict with an existing address. */
1280   if (! is_del)
1281     {
1282       ip_interface_address_t * ia;
1283       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1284                                     0 /* honor unnumbered */,
1285       ({
1286         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1287
1288         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1289             || ip4_destination_matches_route (im, x, address, address_length))
1290           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1291                                     format_ip4_address_and_length, address, address_length,
1292                                     format_ip4_address_and_length, x, ia->address_length,
1293                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1294       }));
1295     }
1296
1297   elts_before = pool_elts (lm->if_address_pool);
1298
1299   error = ip_interface_address_add_del
1300     (lm,
1301      sw_if_index,
1302      addr_fib,
1303      address_length,
1304      is_del,
1305      &if_address_index);
1306   if (error)
1307     goto done;
1308   
1309   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1310     {
1311       if (is_del)
1312         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1313                                   address_length);
1314       
1315       else
1316           ip4_add_interface_routes (sw_if_index,
1317                                     im, ip4_af.fib_index,
1318                                     pool_elt_at_index 
1319                                     (lm->if_address_pool, if_address_index));
1320     }
1321
1322   /* If pool did not grow/shrink: add duplicate address. */
1323   if (elts_before != pool_elts (lm->if_address_pool))
1324     {
1325       ip4_add_del_interface_address_callback_t * cb;
1326       vec_foreach (cb, im->add_del_interface_address_callbacks)
1327         cb->function (im, cb->function_opaque, sw_if_index,
1328                       address, address_length,
1329                       if_address_index,
1330                       is_del);
1331     }
1332
1333  done:
1334   vec_free (addr_fib);
1335   return error;
1336 }
1337
1338 clib_error_t *
1339 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1340                                ip4_address_t * address, u32 address_length,
1341                                u32 is_del)
1342 {
1343   return ip4_add_del_interface_address_internal
1344     (vm, sw_if_index, address, address_length,
1345      /* redistribute */ 1,
1346      /* insert_routes */ 1,
1347      is_del);
1348 }
1349
1350 static clib_error_t *
1351 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1352                                 u32 sw_if_index,
1353                                 u32 flags)
1354 {
1355   ip4_main_t * im = &ip4_main;
1356   ip_interface_address_t * ia;
1357   ip4_address_t * a;
1358   u32 is_admin_up, fib_index;
1359   
1360   /* Fill in lookup tables with default table (0). */
1361   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1362   
1363   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1364   
1365   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1366   
1367   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1368
1369   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1370                                 0 /* honor unnumbered */,
1371   ({
1372     a = ip_interface_address_get_address (&im->lookup_main, ia);
1373     if (is_admin_up)
1374       ip4_add_interface_routes (sw_if_index,
1375                                 im, fib_index,
1376                                 ia);
1377     else
1378       ip4_del_interface_routes (im, fib_index,
1379                                 a, ia->address_length);
1380   }));
1381
1382   return 0;
1383 }
1384  
1385 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1386
1387 /* Built-in ip4 unicast rx feature path definition */
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1389   .node_name = "ip4-inacl", 
1390   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
1391   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1392 };
1393
1394 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1395   .node_name = "ip4-source-check-via-rx",
1396   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
1397   .feature_index = 
1398   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1399 };
1400
1401 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1402   .node_name = "ip4-source-check-via-any",
1403   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1404   .feature_index = 
1405   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1406 };
1407
1408 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
1409   .node_name = "ip4-source-and-port-range-check-rx",
1410   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1411   .feature_index =
1412   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1413 };
1414
1415 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1416   .node_name = "ip4-policer-classify",
1417   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
1418   .feature_index =
1419   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1420 };
1421
1422 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1423   .node_name = "ipsec-input-ip4",
1424   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
1425   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1426 };
1427
1428 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1429   .node_name = "vpath-input-ip4",
1430   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
1431   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1432 };
1433
1434 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1435   .node_name = "ip4-lookup",
1436   .runs_before = 0, /* not before any other features */
1437   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1438 };
1439
1440 /* Built-in ip4 multicast rx feature path definition */
1441 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1442   .node_name = "vpath-input-ip4",
1443   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
1444   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1445 };
1446
1447 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1448   .node_name = "ip4-lookup-multicast",
1449   .runs_before = 0, /* not before any other features */
1450   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1451 };
1452
1453 static char * rx_feature_start_nodes[] = 
1454   { "ip4-input", "ip4-input-no-checksum"};
1455
1456 static char * tx_feature_start_nodes[] = 
1457 { "ip4-rewrite-transit"};
1458
1459 /* Source and port-range check ip4 tx feature path definition */
1460 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1461   .node_name = "ip4-source-and-port-range-check-tx",
1462   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
1463   .feature_index =
1464   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
1465
1466 };
1467
1468 /* Built-in ip4 tx feature path definition */
1469 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
1470   .node_name = "interface-output",
1471   .runs_before = 0, /* not before any other features */
1472   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
1473 };
1474
1475
1476 static clib_error_t *
1477 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1478 {
1479   ip_lookup_main_t * lm = &im->lookup_main;
1480   clib_error_t * error;
1481   vnet_cast_t cast;
1482   ip_config_main_t * cm;
1483   vnet_config_main_t * vcm;
1484   char **feature_start_nodes;
1485   int feature_start_len;
1486
1487   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1488     {
1489       cm = &lm->feature_config_mains[cast];
1490       vcm = &cm->config_main;
1491
1492       if (cast < VNET_IP_TX_FEAT)
1493         {
1494           feature_start_nodes = rx_feature_start_nodes;
1495           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
1496         }
1497       else
1498         {
1499           feature_start_nodes = tx_feature_start_nodes;
1500           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
1501         }
1502       
1503       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1504                                          feature_start_nodes,
1505                                          feature_start_len,
1506                                          cast,
1507                                          1 /* is_ip4 */)))
1508         return error;
1509     }
1510
1511   return 0;
1512 }
1513
1514 static clib_error_t *
1515 ip4_sw_interface_add_del (vnet_main_t * vnm,
1516                           u32 sw_if_index,
1517                           u32 is_add)
1518 {
1519   vlib_main_t * vm = vnm->vlib_main;
1520   ip4_main_t * im = &ip4_main;
1521   ip_lookup_main_t * lm = &im->lookup_main;
1522   u32 ci, cast;
1523   u32 feature_index;
1524
1525   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1526     {
1527       ip_config_main_t * cm = &lm->feature_config_mains[cast];
1528       vnet_config_main_t * vcm = &cm->config_main;
1529
1530       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1531       ci = cm->config_index_by_sw_if_index[sw_if_index];
1532
1533       if (cast == VNET_IP_RX_UNICAST_FEAT)
1534         feature_index = im->ip4_unicast_rx_feature_lookup;
1535       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1536         feature_index = im->ip4_multicast_rx_feature_lookup;
1537       else
1538         feature_index = im->ip4_tx_feature_interface_output;
1539
1540       if (is_add)
1541         ci = vnet_config_add_feature (vm, vcm, 
1542                                       ci,
1543                                       feature_index,
1544                                       /* config data */ 0,
1545                                       /* # bytes of config data */ 0);
1546       else
1547         ci = vnet_config_del_feature (vm, vcm,
1548                                       ci,
1549                                       feature_index,
1550                                       /* config data */ 0,
1551                                       /* # bytes of config data */ 0);
1552
1553       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1554       /* 
1555        * note: do not update the tx feature count here.
1556        */
1557     }
1558
1559   return /* no error */ 0;
1560 }
1561
1562 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1563
1564 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1565
1566 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1567   .function = ip4_lookup,
1568   .name = "ip4-lookup",
1569   .vector_size = sizeof (u32),
1570
1571   .format_trace = format_ip4_lookup_trace,
1572
1573   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1574   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1575 };
1576
1577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
1578
1579 static uword
1580 ip4_indirect (vlib_main_t * vm,
1581                vlib_node_runtime_t * node,
1582                vlib_frame_t * frame)
1583 {
1584   return ip4_lookup_inline (vm, node, frame,
1585                             /* lookup_for_responses_to_locally_received_packets */ 0,
1586                             /* is_indirect */ 1);
1587 }
1588
1589 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1590   .function = ip4_indirect,
1591   .name = "ip4-indirect",
1592   .vector_size = sizeof (u32),
1593   .sibling_of = "ip4-lookup",
1594   .format_trace = format_ip4_lookup_trace,
1595
1596   .n_next_nodes = 0,
1597 };
1598
1599 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
1600
1601
1602 /* Global IP4 main. */
1603 ip4_main_t ip4_main;
1604
1605 clib_error_t *
1606 ip4_lookup_init (vlib_main_t * vm)
1607 {
1608   ip4_main_t * im = &ip4_main;
1609   clib_error_t * error;
1610   uword i;
1611
1612   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1613     {
1614       u32 m;
1615
1616       if (i < 32)
1617         m = pow2_mask (i) << (32 - i);
1618       else 
1619         m = ~0;
1620       im->fib_masks[i] = clib_host_to_net_u32 (m);
1621     }
1622
1623   /* Create FIB with index 0 and table id of 0. */
1624   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1625
1626   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1627
1628   {
1629     pg_node_t * pn;
1630     pn = pg_get_node (ip4_lookup_node.index);
1631     pn->unformat_edit = unformat_pg_ip4_header;
1632   }
1633
1634   {
1635     ethernet_arp_header_t h;
1636
1637     memset (&h, 0, sizeof (h));
1638
1639     /* Set target ethernet address to all zeros. */
1640     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1641
1642 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1643 #define _8(f,v) h.f = v;
1644     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1645     _16 (l3_type, ETHERNET_TYPE_IP4);
1646     _8 (n_l2_address_bytes, 6);
1647     _8 (n_l3_address_bytes, 4);
1648     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1649 #undef _16
1650 #undef _8
1651
1652     vlib_packet_template_init (vm,
1653                                &im->ip4_arp_request_packet_template,
1654                                /* data */ &h,
1655                                sizeof (h),
1656                                /* alloc chunk size */ 8,
1657                                "ip4 arp");
1658   }
1659
1660   error = ip4_feature_init (vm, im);
1661
1662   return error;
1663 }
1664
1665 VLIB_INIT_FUNCTION (ip4_lookup_init);
1666
1667 typedef struct {
1668   /* Adjacency taken. */
1669   u32 adj_index;
1670   u32 flow_hash;
1671   u32 fib_index;
1672
1673   /* Packet data, possibly *after* rewrite. */
1674   u8 packet_data[64 - 1*sizeof(u32)];
1675 } ip4_forward_next_trace_t;
1676
1677 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1678 {
1679   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1680   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1681   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1682   uword indent = format_get_indent (s);
1683   s = format (s, "%U%U",
1684                 format_white_space, indent,
1685                 format_ip4_header, t->packet_data);
1686   return s;
1687 }
1688
1689 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1690 {
1691   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1692   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1693   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1694   vnet_main_t * vnm = vnet_get_main();
1695   ip4_main_t * im = &ip4_main;
1696   uword indent = format_get_indent (s);
1697
1698   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1699               t->fib_index, t->adj_index, format_ip_adjacency,
1700               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1701   s = format (s, "\n%U%U",
1702               format_white_space, indent,
1703               format_ip4_header, t->packet_data);
1704   return s;
1705 }
1706
1707 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1708 {
1709   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1710   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1711   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1712   vnet_main_t * vnm = vnet_get_main();
1713   ip4_main_t * im = &ip4_main;
1714   uword indent = format_get_indent (s);
1715
1716   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1717               t->fib_index, t->adj_index, format_ip_adjacency,
1718               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1719   s = format (s, "\n%U%U",
1720               format_white_space, indent,
1721               format_ip_adjacency_packet_data,
1722               vnm, &im->lookup_main, t->adj_index,
1723               t->packet_data, sizeof (t->packet_data));
1724   return s;
1725 }
1726
1727 /* Common trace function for all ip4-forward next nodes. */
1728 void
1729 ip4_forward_next_trace (vlib_main_t * vm,
1730                         vlib_node_runtime_t * node,
1731                         vlib_frame_t * frame,
1732                         vlib_rx_or_tx_t which_adj_index)
1733 {
1734   u32 * from, n_left;
1735   ip4_main_t * im = &ip4_main;
1736
1737   n_left = frame->n_vectors;
1738   from = vlib_frame_vector_args (frame);
1739   
1740   while (n_left >= 4)
1741     {
1742       u32 bi0, bi1;
1743       vlib_buffer_t * b0, * b1;
1744       ip4_forward_next_trace_t * t0, * t1;
1745
1746       /* Prefetch next iteration. */
1747       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1748       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1749
1750       bi0 = from[0];
1751       bi1 = from[1];
1752
1753       b0 = vlib_get_buffer (vm, bi0);
1754       b1 = vlib_get_buffer (vm, bi1);
1755
1756       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1757         {
1758           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1759           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1760           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1761           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1762               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1763               vec_elt (im->fib_index_by_sw_if_index,
1764                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1765
1766           clib_memcpy (t0->packet_data,
1767                   vlib_buffer_get_current (b0),
1768                   sizeof (t0->packet_data));
1769         }
1770       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1771         {
1772           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1773           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1774           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1775           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1776               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1777               vec_elt (im->fib_index_by_sw_if_index,
1778                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1779           clib_memcpy (t1->packet_data,
1780                   vlib_buffer_get_current (b1),
1781                   sizeof (t1->packet_data));
1782         }
1783       from += 2;
1784       n_left -= 2;
1785     }
1786
1787   while (n_left >= 1)
1788     {
1789       u32 bi0;
1790       vlib_buffer_t * b0;
1791       ip4_forward_next_trace_t * t0;
1792
1793       bi0 = from[0];
1794
1795       b0 = vlib_get_buffer (vm, bi0);
1796
1797       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1798         {
1799           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1800           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1801           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1802           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1803               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1804               vec_elt (im->fib_index_by_sw_if_index,
1805                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1806           clib_memcpy (t0->packet_data,
1807                   vlib_buffer_get_current (b0),
1808                   sizeof (t0->packet_data));
1809         }
1810       from += 1;
1811       n_left -= 1;
1812     }
1813 }
1814
1815 static uword
1816 ip4_drop_or_punt (vlib_main_t * vm,
1817                   vlib_node_runtime_t * node,
1818                   vlib_frame_t * frame,
1819                   ip4_error_t error_code)
1820 {
1821   u32 * buffers = vlib_frame_vector_args (frame);
1822   uword n_packets = frame->n_vectors;
1823
1824   vlib_error_drop_buffers (vm, node,
1825                            buffers,
1826                            /* stride */ 1,
1827                            n_packets,
1828                            /* next */ 0,
1829                            ip4_input_node.index,
1830                            error_code);
1831
1832   if (node->flags & VLIB_NODE_FLAG_TRACE)
1833     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1834
1835   return n_packets;
1836 }
1837
1838 static uword
1839 ip4_drop (vlib_main_t * vm,
1840           vlib_node_runtime_t * node,
1841           vlib_frame_t * frame)
1842 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1843
1844 static uword
1845 ip4_punt (vlib_main_t * vm,
1846           vlib_node_runtime_t * node,
1847           vlib_frame_t * frame)
1848 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1849
1850 static uword
1851 ip4_miss (vlib_main_t * vm,
1852           vlib_node_runtime_t * node,
1853           vlib_frame_t * frame)
1854 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1855
1856 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1857   .function = ip4_drop,
1858   .name = "ip4-drop",
1859   .vector_size = sizeof (u32),
1860
1861   .format_trace = format_ip4_forward_next_trace,
1862
1863   .n_next_nodes = 1,
1864   .next_nodes = {
1865     [0] = "error-drop",
1866   },
1867 };
1868
1869 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1870
1871 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1872   .function = ip4_punt,
1873   .name = "ip4-punt",
1874   .vector_size = sizeof (u32),
1875
1876   .format_trace = format_ip4_forward_next_trace,
1877
1878   .n_next_nodes = 1,
1879   .next_nodes = {
1880     [0] = "error-punt",
1881   },
1882 };
1883
1884 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1885
1886 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1887   .function = ip4_miss,
1888   .name = "ip4-miss",
1889   .vector_size = sizeof (u32),
1890
1891   .format_trace = format_ip4_forward_next_trace,
1892
1893   .n_next_nodes = 1,
1894   .next_nodes = {
1895     [0] = "error-drop",
1896   },
1897 };
1898
1899 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
1900
1901 /* Compute TCP/UDP/ICMP4 checksum in software. */
1902 u16
1903 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1904                               ip4_header_t * ip0)
1905 {
1906   ip_csum_t sum0;
1907   u32 ip_header_length, payload_length_host_byte_order;
1908   u32 n_this_buffer, n_bytes_left;
1909   u16 sum16;
1910   void * data_this_buffer;
1911   
1912   /* Initialize checksum with ip header. */
1913   ip_header_length = ip4_header_bytes (ip0);
1914   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1915   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1916
1917   if (BITS (uword) == 32)
1918     {
1919       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1920       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1921     }
1922   else
1923     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1924
1925   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1926   data_this_buffer = (void *) ip0 + ip_header_length;
1927   if (n_this_buffer + ip_header_length > p0->current_length)
1928     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1929   while (1)
1930     {
1931       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1932       n_bytes_left -= n_this_buffer;
1933       if (n_bytes_left == 0)
1934         break;
1935
1936       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1937       p0 = vlib_get_buffer (vm, p0->next_buffer);
1938       data_this_buffer = vlib_buffer_get_current (p0);
1939       n_this_buffer = p0->current_length;
1940     }
1941
1942   sum16 = ~ ip_csum_fold (sum0);
1943
1944   return sum16;
1945 }
1946
1947 static u32
1948 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1949 {
1950   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1951   udp_header_t * udp0;
1952   u16 sum16;
1953
1954   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1955           || ip0->protocol == IP_PROTOCOL_UDP);
1956
1957   udp0 = (void *) (ip0 + 1);
1958   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1959     {
1960       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1961                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1962       return p0->flags;
1963     }
1964
1965   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1966
1967   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1968                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1969
1970   return p0->flags;
1971 }
1972
1973 static uword
1974 ip4_local (vlib_main_t * vm,
1975            vlib_node_runtime_t * node,
1976            vlib_frame_t * frame)
1977 {
1978   ip4_main_t * im = &ip4_main;
1979   ip_lookup_main_t * lm = &im->lookup_main;
1980   ip_local_next_t next_index;
1981   u32 * from, * to_next, n_left_from, n_left_to_next;
1982   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1983
1984   from = vlib_frame_vector_args (frame);
1985   n_left_from = frame->n_vectors;
1986   next_index = node->cached_next_index;
1987   
1988   if (node->flags & VLIB_NODE_FLAG_TRACE)
1989     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1990
1991   while (n_left_from > 0)
1992     {
1993       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1994
1995       while (n_left_from >= 4 && n_left_to_next >= 2)
1996         {
1997           vlib_buffer_t * p0, * p1;
1998           ip4_header_t * ip0, * ip1;
1999           udp_header_t * udp0, * udp1;
2000           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2001           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2002           ip_adjacency_t * adj0, * adj1;
2003           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2004           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2005           i32 len_diff0, len_diff1;
2006           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2007           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2008           u8 enqueue_code;
2009       
2010           pi0 = to_next[0] = from[0];
2011           pi1 = to_next[1] = from[1];
2012           from += 2;
2013           n_left_from -= 2;
2014           to_next += 2;
2015           n_left_to_next -= 2;
2016       
2017           p0 = vlib_get_buffer (vm, pi0);
2018           p1 = vlib_get_buffer (vm, pi1);
2019
2020           ip0 = vlib_buffer_get_current (p0);
2021           ip1 = vlib_buffer_get_current (p1);
2022
2023           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2024                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2025           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2026                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2027
2028           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2029           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2030
2031           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2032
2033           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2034           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2035
2036           /* Treat IP frag packets as "experimental" protocol for now
2037              until support of IP frag reassembly is implemented */
2038           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2039           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
2040           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2041           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2042           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2043           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2044
2045           flags0 = p0->flags;
2046           flags1 = p1->flags;
2047
2048           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2049           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2050
2051           udp0 = ip4_next_header (ip0);
2052           udp1 = ip4_next_header (ip1);
2053
2054           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2055           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2056           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2057
2058           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2059           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2060
2061           /* Verify UDP length. */
2062           ip_len0 = clib_net_to_host_u16 (ip0->length);
2063           ip_len1 = clib_net_to_host_u16 (ip1->length);
2064           udp_len0 = clib_net_to_host_u16 (udp0->length);
2065           udp_len1 = clib_net_to_host_u16 (udp1->length);
2066
2067           len_diff0 = ip_len0 - udp_len0;
2068           len_diff1 = ip_len1 - udp_len1;
2069
2070           len_diff0 = is_udp0 ? len_diff0 : 0;
2071           len_diff1 = is_udp1 ? len_diff1 : 0;
2072
2073           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2074                                 & good_tcp_udp0 & good_tcp_udp1)))
2075             {
2076               if (is_tcp_udp0)
2077                 {
2078                   if (is_tcp_udp0
2079                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2080                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2081                   good_tcp_udp0 =
2082                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2083                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2084                 }
2085               if (is_tcp_udp1)
2086                 {
2087                   if (is_tcp_udp1
2088                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2089                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2090                   good_tcp_udp1 =
2091                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2092                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2093                 }
2094             }
2095
2096           good_tcp_udp0 &= len_diff0 >= 0;
2097           good_tcp_udp1 &= len_diff1 >= 0;
2098
2099           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2100           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2101
2102           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2103
2104           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2105           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2106
2107           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2108           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2109                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2110                     : error0);
2111           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2112                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2113                     : error1);
2114
2115           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2116           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2117
2118           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2119           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2120
2121           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2122           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2123
2124           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2125                                                            &ip0->src_address,
2126                                                            /* no_default_route */ 1));
2127           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2128                                                            &ip1->src_address,
2129                                                            /* no_default_route */ 1));
2130
2131           adj0 = ip_get_adjacency (lm, adj_index0);
2132           adj1 = ip_get_adjacency (lm, adj_index1);
2133
2134           /* 
2135            * Must have a route to source otherwise we drop the packet.
2136            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2137            */
2138           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2139                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2140                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2141                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2142                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2143                     ? IP4_ERROR_SRC_LOOKUP_MISS
2144                     : error0);
2145           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2146                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2147                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2148                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2149                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2150                     ? IP4_ERROR_SRC_LOOKUP_MISS
2151                     : error1);
2152
2153           next0 = lm->local_next_by_ip_protocol[proto0];
2154           next1 = lm->local_next_by_ip_protocol[proto1];
2155
2156           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2157           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2158
2159           p0->error = error0 ? error_node->errors[error0] : 0;
2160           p1->error = error1 ? error_node->errors[error1] : 0;
2161
2162           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2163
2164           if (PREDICT_FALSE (enqueue_code != 0))
2165             {
2166               switch (enqueue_code)
2167                 {
2168                 case 1:
2169                   /* A B A */
2170                   to_next[-2] = pi1;
2171                   to_next -= 1;
2172                   n_left_to_next += 1;
2173                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2174                   break;
2175
2176                 case 2:
2177                   /* A A B */
2178                   to_next -= 1;
2179                   n_left_to_next += 1;
2180                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2181                   break;
2182
2183                 case 3:
2184                   /* A B B or A B C */
2185                   to_next -= 2;
2186                   n_left_to_next += 2;
2187                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2188                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2189                   if (next0 == next1)
2190                     {
2191                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2192                       next_index = next1;
2193                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2194                     }
2195                   break;
2196                 }
2197             }
2198         }
2199
2200       while (n_left_from > 0 && n_left_to_next > 0)
2201         {
2202           vlib_buffer_t * p0;
2203           ip4_header_t * ip0;
2204           udp_header_t * udp0;
2205           ip4_fib_mtrie_t * mtrie0;
2206           ip4_fib_mtrie_leaf_t leaf0;
2207           ip_adjacency_t * adj0;
2208           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2209           i32 len_diff0;
2210           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2211       
2212           pi0 = to_next[0] = from[0];
2213           from += 1;
2214           n_left_from -= 1;
2215           to_next += 1;
2216           n_left_to_next -= 1;
2217       
2218           p0 = vlib_get_buffer (vm, pi0);
2219
2220           ip0 = vlib_buffer_get_current (p0);
2221
2222           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2223                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2224
2225           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2226
2227           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2228
2229           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2230
2231           /* Treat IP frag packets as "experimental" protocol for now
2232              until support of IP frag reassembly is implemented */
2233           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2234           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2235           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2236
2237           flags0 = p0->flags;
2238
2239           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2240
2241           udp0 = ip4_next_header (ip0);
2242
2243           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2244           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2245
2246           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2247
2248           /* Verify UDP length. */
2249           ip_len0 = clib_net_to_host_u16 (ip0->length);
2250           udp_len0 = clib_net_to_host_u16 (udp0->length);
2251
2252           len_diff0 = ip_len0 - udp_len0;
2253
2254           len_diff0 = is_udp0 ? len_diff0 : 0;
2255
2256           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2257             {
2258               if (is_tcp_udp0)
2259                 {
2260                   if (is_tcp_udp0
2261                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2262                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2263                   good_tcp_udp0 =
2264                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2265                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2266                 }
2267             }
2268
2269           good_tcp_udp0 &= len_diff0 >= 0;
2270
2271           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2272
2273           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2274
2275           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2276
2277           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2278           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2279                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2280                     : error0);
2281
2282           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2283
2284           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2285           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2286
2287           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2288                                                            &ip0->src_address,
2289                                                            /* no_default_route */ 1));
2290
2291           adj0 = ip_get_adjacency (lm, adj_index0);
2292
2293           /* Must have a route to source otherwise we drop the packet. */
2294           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2295                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2296                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2297                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2298                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2299                     ? IP4_ERROR_SRC_LOOKUP_MISS
2300                     : error0);
2301
2302           next0 = lm->local_next_by_ip_protocol[proto0];
2303
2304           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2305
2306           p0->error = error0? error_node->errors[error0] : 0;
2307
2308           if (PREDICT_FALSE (next0 != next_index))
2309             {
2310               n_left_to_next += 1;
2311               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2312
2313               next_index = next0;
2314               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2315               to_next[0] = pi0;
2316               to_next += 1;
2317               n_left_to_next -= 1;
2318             }
2319         }
2320   
2321       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2322     }
2323
2324   return frame->n_vectors;
2325 }
2326
2327 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2328   .function = ip4_local,
2329   .name = "ip4-local",
2330   .vector_size = sizeof (u32),
2331
2332   .format_trace = format_ip4_forward_next_trace,
2333
2334   .n_next_nodes = IP_LOCAL_N_NEXT,
2335   .next_nodes = {
2336     [IP_LOCAL_NEXT_DROP] = "error-drop",
2337     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2338     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2339     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2340   },
2341 };
2342
2343 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
2344
2345 void ip4_register_protocol (u32 protocol, u32 node_index)
2346 {
2347   vlib_main_t * vm = vlib_get_main();
2348   ip4_main_t * im = &ip4_main;
2349   ip_lookup_main_t * lm = &im->lookup_main;
2350
2351   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2352   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2353 }
2354
2355 static clib_error_t *
2356 show_ip_local_command_fn (vlib_main_t * vm,
2357                           unformat_input_t * input,
2358                          vlib_cli_command_t * cmd)
2359 {
2360   ip4_main_t * im = &ip4_main;
2361   ip_lookup_main_t * lm = &im->lookup_main;
2362   int i;
2363
2364   vlib_cli_output (vm, "Protocols handled by ip4_local");
2365   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2366     {
2367       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2368         vlib_cli_output (vm, "%d", i);
2369     }
2370   return 0;
2371 }
2372
2373
2374
2375 VLIB_CLI_COMMAND (show_ip_local, static) = {
2376   .path = "show ip local",
2377   .function = show_ip_local_command_fn,
2378   .short_help = "Show ip local protocol table",
2379 };
2380
2381 static uword
2382 ip4_arp (vlib_main_t * vm,
2383          vlib_node_runtime_t * node,
2384          vlib_frame_t * frame)
2385 {
2386   vnet_main_t * vnm = vnet_get_main();
2387   ip4_main_t * im = &ip4_main;
2388   ip_lookup_main_t * lm = &im->lookup_main;
2389   u32 * from, * to_next_drop;
2390   uword n_left_from, n_left_to_next_drop, next_index;
2391   static f64 time_last_seed_change = -1e100;
2392   static u32 hash_seeds[3];
2393   static uword hash_bitmap[256 / BITS (uword)]; 
2394   f64 time_now;
2395
2396   if (node->flags & VLIB_NODE_FLAG_TRACE)
2397     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2398
2399   time_now = vlib_time_now (vm);
2400   if (time_now - time_last_seed_change > 1e-3)
2401     {
2402       uword i;
2403       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2404                                              sizeof (hash_seeds));
2405       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2406         hash_seeds[i] = r[i];
2407
2408       /* Mark all hash keys as been no-seen before. */
2409       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2410         hash_bitmap[i] = 0;
2411
2412       time_last_seed_change = time_now;
2413     }
2414
2415   from = vlib_frame_vector_args (frame);
2416   n_left_from = frame->n_vectors;
2417   next_index = node->cached_next_index;
2418   if (next_index == IP4_ARP_NEXT_DROP)
2419     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2420
2421   while (n_left_from > 0)
2422     {
2423       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2424                            to_next_drop, n_left_to_next_drop);
2425
2426       while (n_left_from > 0 && n_left_to_next_drop > 0)
2427         {
2428           vlib_buffer_t * p0;
2429           ip4_header_t * ip0;
2430           ethernet_header_t * eh0;
2431           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2432           uword bm0;
2433           ip_adjacency_t * adj0;
2434
2435           pi0 = from[0];
2436
2437           p0 = vlib_get_buffer (vm, pi0);
2438
2439           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2440           adj0 = ip_get_adjacency (lm, adj_index0);
2441           ip0 = vlib_buffer_get_current (p0);
2442
2443           /* If packet destination is not local, send ARP to next hop */
2444           if (adj0->arp.next_hop.ip4.as_u32)
2445             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2446
2447           /* 
2448            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2449            * rewrite to this packet, we need to skip it here.
2450            * Note, to distinguish from src IP addr *.8.6.*, we
2451            * check for a bcast eth dest instead of IPv4 version.
2452            */
2453           eh0 = (ethernet_header_t*)ip0;
2454           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2455             {
2456               u32 vlan_num = 0;
2457               u16 * etype = &eh0->type;
2458               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2459                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2460                 {
2461                   vlan_num += 1;
2462                   etype += 2; //vlan tag also 16 bits, same as etype
2463                 }
2464               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2465                 {
2466                   vlib_buffer_advance (
2467                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2468                   ip0 = vlib_buffer_get_current (p0);
2469                 }
2470             }
2471
2472           a0 = hash_seeds[0];
2473           b0 = hash_seeds[1];
2474           c0 = hash_seeds[2];
2475
2476           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2477           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2478
2479           a0 ^= ip0->dst_address.data_u32;
2480           b0 ^= sw_if_index0;
2481
2482           hash_v3_finalize32 (a0, b0, c0);
2483
2484           c0 &= BITS (hash_bitmap) - 1;
2485           c0 = c0 / BITS (uword);
2486           m0 = (uword) 1 << (c0 % BITS (uword));
2487
2488           bm0 = hash_bitmap[c0];
2489           drop0 = (bm0 & m0) != 0;
2490
2491           /* Mark it as seen. */
2492           hash_bitmap[c0] = bm0 | m0;
2493
2494           from += 1;
2495           n_left_from -= 1;
2496           to_next_drop[0] = pi0;
2497           to_next_drop += 1;
2498           n_left_to_next_drop -= 1;
2499
2500           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2501
2502           if (drop0)
2503             continue;
2504
2505           /* 
2506            * Can happen if the control-plane is programming tables
2507            * with traffic flowing; at least that's today's lame excuse.
2508            */
2509           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2510             {
2511               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2512             }
2513           else
2514           /* Send ARP request. */
2515           {
2516             u32 bi0 = 0;
2517             vlib_buffer_t * b0;
2518             ethernet_arp_header_t * h0;
2519             vnet_hw_interface_t * hw_if0;
2520
2521             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2522
2523             /* Add rewrite/encap string for ARP packet. */
2524             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2525
2526             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2527
2528             /* Src ethernet address in ARP header. */
2529             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2530                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2531
2532             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2533                 //No source address available
2534                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2535                 vlib_buffer_free(vm, &bi0, 1);
2536                 continue;
2537             }
2538
2539             /* Copy in destination address we are requesting. */
2540             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2541
2542             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2543             b0 = vlib_get_buffer (vm, bi0);
2544             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2545
2546             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2547
2548             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2549           }
2550         }
2551
2552       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2553     }
2554
2555   return frame->n_vectors;
2556 }
2557
2558 static char * ip4_arp_error_strings[] = {
2559   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2560   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2561   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2562   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2563   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2564   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2565 };
2566
2567 VLIB_REGISTER_NODE (ip4_arp_node) = {
2568   .function = ip4_arp,
2569   .name = "ip4-arp",
2570   .vector_size = sizeof (u32),
2571
2572   .format_trace = format_ip4_forward_next_trace,
2573
2574   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2575   .error_strings = ip4_arp_error_strings,
2576
2577   .n_next_nodes = IP4_ARP_N_NEXT,
2578   .next_nodes = {
2579     [IP4_ARP_NEXT_DROP] = "error-drop",
2580   },
2581 };
2582
2583 #define foreach_notrace_ip4_arp_error           \
2584 _(DROP)                                         \
2585 _(REQUEST_SENT)                                 \
2586 _(REPLICATE_DROP)                               \
2587 _(REPLICATE_FAIL)
2588
2589 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2590 {
2591   vlib_node_runtime_t *rt = 
2592     vlib_node_get_runtime (vm, ip4_arp_node.index);
2593
2594   /* don't trace ARP request packets */
2595 #define _(a)                                    \
2596     vnet_pcap_drop_trace_filter_add_del         \
2597         (rt->errors[IP4_ARP_ERROR_##a],         \
2598          1 /* is_add */);
2599     foreach_notrace_ip4_arp_error;
2600 #undef _
2601   return 0;
2602 }
2603
2604 VLIB_INIT_FUNCTION(arp_notrace_init);
2605
2606
2607 /* Send an ARP request to see if given destination is reachable on given interface. */
2608 clib_error_t *
2609 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2610 {
2611   vnet_main_t * vnm = vnet_get_main();
2612   ip4_main_t * im = &ip4_main;
2613   ethernet_arp_header_t * h;
2614   ip4_address_t * src;
2615   ip_interface_address_t * ia;
2616   ip_adjacency_t * adj;
2617   vnet_hw_interface_t * hi;
2618   vnet_sw_interface_t * si;
2619   vlib_buffer_t * b;
2620   u32 bi = 0;
2621
2622   si = vnet_get_sw_interface (vnm, sw_if_index);
2623
2624   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2625     {
2626       return clib_error_return (0, "%U: interface %U down",
2627                                 format_ip4_address, dst, 
2628                                 format_vnet_sw_if_index_name, vnm, 
2629                                 sw_if_index);
2630     }
2631
2632   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2633   if (! src)
2634     {
2635       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2636       return clib_error_return 
2637         (0, "no matching interface address for destination %U (interface %U)",
2638          format_ip4_address, dst,
2639          format_vnet_sw_if_index_name, vnm, sw_if_index);
2640     }
2641
2642   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2643
2644   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2645
2646   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2647
2648   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2649
2650   h->ip4_over_ethernet[0].ip4 = src[0];
2651   h->ip4_over_ethernet[1].ip4 = dst[0];
2652
2653   b = vlib_get_buffer (vm, bi);
2654   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2655
2656   /* Add encapsulation string for software interface (e.g. ethernet header). */
2657   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2658   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2659
2660   {
2661     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2662     u32 * to_next = vlib_frame_vector_args (f);
2663     to_next[0] = bi;
2664     f->n_vectors = 1;
2665     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2666   }
2667
2668   return /* no error */ 0;
2669 }
2670
2671 typedef enum {
2672   IP4_REWRITE_NEXT_DROP,
2673   IP4_REWRITE_NEXT_ARP,
2674   IP4_REWRITE_NEXT_ICMP_ERROR,
2675 } ip4_rewrite_next_t;
2676
2677 always_inline uword
2678 ip4_rewrite_inline (vlib_main_t * vm,
2679                     vlib_node_runtime_t * node,
2680                     vlib_frame_t * frame,
2681                     int rewrite_for_locally_received_packets)
2682 {
2683   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2684   u32 * from = vlib_frame_vector_args (frame);
2685   u32 n_left_from, n_left_to_next, * to_next, next_index;
2686   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2687   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2688   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2689
2690   n_left_from = frame->n_vectors;
2691   next_index = node->cached_next_index;
2692   u32 cpu_index = os_get_cpu_number();
2693   
2694   while (n_left_from > 0)
2695     {
2696       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2697
2698       while (n_left_from >= 4 && n_left_to_next >= 2)
2699         {
2700           ip_adjacency_t * adj0, * adj1;
2701           vlib_buffer_t * p0, * p1;
2702           ip4_header_t * ip0, * ip1;
2703           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2704           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2705           u32 next0_override, next1_override;
2706           u32 tx_sw_if_index0, tx_sw_if_index1;
2707       
2708           if (rewrite_for_locally_received_packets)
2709               next0_override = next1_override = 0;
2710
2711           /* Prefetch next iteration. */
2712           {
2713             vlib_buffer_t * p2, * p3;
2714
2715             p2 = vlib_get_buffer (vm, from[2]);
2716             p3 = vlib_get_buffer (vm, from[3]);
2717
2718             vlib_prefetch_buffer_header (p2, STORE);
2719             vlib_prefetch_buffer_header (p3, STORE);
2720
2721             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2722             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2723           }
2724
2725           pi0 = to_next[0] = from[0];
2726           pi1 = to_next[1] = from[1];
2727
2728           from += 2;
2729           n_left_from -= 2;
2730           to_next += 2;
2731           n_left_to_next -= 2;
2732       
2733           p0 = vlib_get_buffer (vm, pi0);
2734           p1 = vlib_get_buffer (vm, pi1);
2735
2736           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2737           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2738
2739           /* We should never rewrite a pkt using the MISS adjacency */
2740           ASSERT(adj_index0 && adj_index1);
2741
2742           ip0 = vlib_buffer_get_current (p0);
2743           ip1 = vlib_buffer_get_current (p1);
2744
2745           error0 = error1 = IP4_ERROR_NONE;
2746           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2747
2748           /* Decrement TTL & update checksum.
2749              Works either endian, so no need for byte swap. */
2750           if (! rewrite_for_locally_received_packets)
2751             {
2752               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2753
2754               /* Input node should have reject packets with ttl 0. */
2755               ASSERT (ip0->ttl > 0);
2756               ASSERT (ip1->ttl > 0);
2757
2758               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2759               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2760
2761               checksum0 += checksum0 >= 0xffff;
2762               checksum1 += checksum1 >= 0xffff;
2763
2764               ip0->checksum = checksum0;
2765               ip1->checksum = checksum1;
2766
2767               ttl0 -= 1;
2768               ttl1 -= 1;
2769
2770               ip0->ttl = ttl0;
2771               ip1->ttl = ttl1;
2772
2773               /*
2774                * If the ttl drops below 1 when forwarding, generate
2775                * an ICMP response.
2776                */
2777               if (PREDICT_FALSE(ttl0 <= 0))
2778                 {
2779                   error0 = IP4_ERROR_TIME_EXPIRED;
2780                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2781                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2782                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2783                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2784                 }
2785               if (PREDICT_FALSE(ttl1 <= 0))
2786                 {
2787                   error1 = IP4_ERROR_TIME_EXPIRED;
2788                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2789                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2790                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2791                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2792                 }
2793
2794               /* Verify checksum. */
2795               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2796               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2797             }
2798
2799           /* Rewrite packet header and updates lengths. */
2800           adj0 = ip_get_adjacency (lm, adj_index0);
2801           adj1 = ip_get_adjacency (lm, adj_index1);
2802       
2803           if (rewrite_for_locally_received_packets)
2804             {
2805               /*
2806                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2807                * we end up here with a local adjacency in hand
2808                * The local adj rewrite data is 0xfefe on purpose.
2809                * Bad engineer, no donut for you.
2810                */
2811               if (PREDICT_FALSE(adj0->lookup_next_index 
2812                                 == IP_LOOKUP_NEXT_LOCAL))
2813                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2814               if (PREDICT_FALSE(adj0->lookup_next_index
2815                                 == IP_LOOKUP_NEXT_ARP))
2816                 next0_override = IP4_REWRITE_NEXT_ARP;
2817               if (PREDICT_FALSE(adj1->lookup_next_index 
2818                                 == IP_LOOKUP_NEXT_LOCAL))
2819                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2820               if (PREDICT_FALSE(adj1->lookup_next_index
2821                                 == IP_LOOKUP_NEXT_ARP))
2822                 next1_override = IP4_REWRITE_NEXT_ARP;
2823             }
2824
2825           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2826           rw_len0 = adj0[0].rewrite_header.data_bytes;
2827           rw_len1 = adj1[0].rewrite_header.data_bytes;
2828
2829           /* Check MTU of outgoing interface. */
2830           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2831                     ? IP4_ERROR_MTU_EXCEEDED
2832                     : error0);
2833           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2834                     ? IP4_ERROR_MTU_EXCEEDED
2835                     : error1);
2836
2837           next0 = (error0 == IP4_ERROR_NONE)
2838             ? adj0[0].rewrite_header.next_index : next0;
2839
2840           if (rewrite_for_locally_received_packets)
2841               next0 = next0 && next0_override ? next0_override : next0;
2842
2843           next1 = (error1 == IP4_ERROR_NONE)
2844             ? adj1[0].rewrite_header.next_index : next1;
2845
2846           if (rewrite_for_locally_received_packets)
2847               next1 = next1 && next1_override ? next1_override : next1;
2848
2849           /* 
2850            * We've already accounted for an ethernet_header_t elsewhere
2851            */
2852           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2853               vlib_increment_combined_counter 
2854                   (&lm->adjacency_counters,
2855                    cpu_index, adj_index0, 
2856                    /* packet increment */ 0,
2857                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2858
2859           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2860               vlib_increment_combined_counter 
2861                   (&lm->adjacency_counters,
2862                    cpu_index, adj_index1, 
2863                    /* packet increment */ 0,
2864                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2865
2866           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2867            * to see the IP headerr */
2868           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2869             {
2870               p0->current_data -= rw_len0;
2871               p0->current_length += rw_len0;
2872               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2873               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2874                   tx_sw_if_index0;
2875
2876               if (PREDICT_FALSE 
2877                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2878                                     tx_sw_if_index0)))
2879                 {
2880                   p0->current_config_index = 
2881                     vec_elt (cm->config_index_by_sw_if_index, 
2882                              tx_sw_if_index0);
2883                   vnet_get_config_data (&cm->config_main,
2884                                         &p0->current_config_index,
2885                                         &next0,
2886                                         /* # bytes of config data */ 0);
2887                 }
2888             }
2889           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2890             {
2891               p1->current_data -= rw_len1;
2892               p1->current_length += rw_len1;
2893
2894               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2895               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2896                   tx_sw_if_index1;
2897
2898               if (PREDICT_FALSE 
2899                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2900                                     tx_sw_if_index1)))
2901                 {
2902                   p1->current_config_index = 
2903                     vec_elt (cm->config_index_by_sw_if_index, 
2904                              tx_sw_if_index1);
2905                   vnet_get_config_data (&cm->config_main,
2906                                         &p1->current_config_index,
2907                                         &next1,
2908                                         /* # bytes of config data */ 0);
2909                 }
2910             }
2911
2912           /* Guess we are only writing on simple Ethernet header. */
2913           vnet_rewrite_two_headers (adj0[0], adj1[0],
2914                                     ip0, ip1,
2915                                     sizeof (ethernet_header_t));
2916       
2917           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2918                                            to_next, n_left_to_next,
2919                                            pi0, pi1, next0, next1);
2920         }
2921
2922       while (n_left_from > 0 && n_left_to_next > 0)
2923         {
2924           ip_adjacency_t * adj0;
2925           vlib_buffer_t * p0;
2926           ip4_header_t * ip0;
2927           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2928           u32 next0_override;
2929           u32 tx_sw_if_index0;
2930       
2931           if (rewrite_for_locally_received_packets)
2932               next0_override = 0;
2933
2934           pi0 = to_next[0] = from[0];
2935
2936           p0 = vlib_get_buffer (vm, pi0);
2937
2938           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2939
2940           /* We should never rewrite a pkt using the MISS adjacency */
2941           ASSERT(adj_index0);
2942
2943           adj0 = ip_get_adjacency (lm, adj_index0);
2944       
2945           ip0 = vlib_buffer_get_current (p0);
2946
2947           error0 = IP4_ERROR_NONE;
2948           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2949
2950           /* Decrement TTL & update checksum. */
2951           if (! rewrite_for_locally_received_packets)
2952             {
2953               i32 ttl0 = ip0->ttl;
2954
2955               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2956
2957               checksum0 += checksum0 >= 0xffff;
2958
2959               ip0->checksum = checksum0;
2960
2961               ASSERT (ip0->ttl > 0);
2962
2963               ttl0 -= 1;
2964
2965               ip0->ttl = ttl0;
2966
2967               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2968
2969               if (PREDICT_FALSE(ttl0 <= 0))
2970                 {
2971                   /*
2972                    * If the ttl drops below 1 when forwarding, generate
2973                    * an ICMP response.
2974                    */
2975                   error0 = IP4_ERROR_TIME_EXPIRED;
2976                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2977                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2978                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2979                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2980                 }
2981             }
2982
2983           if (rewrite_for_locally_received_packets)
2984             {
2985               /*
2986                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2987                * we end up here with a local adjacency in hand
2988                * The local adj rewrite data is 0xfefe on purpose.
2989                * Bad engineer, no donut for you.
2990                */
2991               if (PREDICT_FALSE(adj0->lookup_next_index 
2992                                 == IP_LOOKUP_NEXT_LOCAL))
2993                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2994               /* 
2995                * We have to override the next_index in ARP adjacencies,
2996                * because they're set up for ip4-arp, not this node...
2997                */
2998               if (PREDICT_FALSE(adj0->lookup_next_index
2999                                 == IP_LOOKUP_NEXT_ARP))
3000                 next0_override = IP4_REWRITE_NEXT_ARP;
3001             }
3002
3003           /* Guess we are only writing on simple Ethernet header. */
3004           vnet_rewrite_one_header (adj0[0], ip0, 
3005                                    sizeof (ethernet_header_t));
3006           
3007           /* Update packet buffer attributes/set output interface. */
3008           rw_len0 = adj0[0].rewrite_header.data_bytes;
3009           
3010           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
3011               vlib_increment_combined_counter 
3012                   (&lm->adjacency_counters,
3013                    cpu_index, adj_index0, 
3014                    /* packet increment */ 0,
3015                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
3016           
3017           /* Check MTU of outgoing interface. */
3018           error0 = (vlib_buffer_length_in_chain (vm, p0) 
3019                     > adj0[0].rewrite_header.max_l3_packet_bytes
3020                     ? IP4_ERROR_MTU_EXCEEDED
3021                     : error0);
3022
3023           p0->error = error_node->errors[error0];
3024
3025           /* Don't adjust the buffer for ttl issue; icmp-error node wants
3026            * to see the IP headerr */
3027           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
3028             {
3029               p0->current_data -= rw_len0;
3030               p0->current_length += rw_len0;
3031               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
3032
3033               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
3034               next0 = adj0[0].rewrite_header.next_index;
3035
3036               if (PREDICT_FALSE 
3037                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
3038                                     tx_sw_if_index0)))
3039                   {
3040                     p0->current_config_index = 
3041                       vec_elt (cm->config_index_by_sw_if_index, 
3042                                tx_sw_if_index0);
3043                     vnet_get_config_data (&cm->config_main,
3044                                           &p0->current_config_index,
3045                                           &next0,
3046                                           /* # bytes of config data */ 0);
3047                   }
3048             }
3049
3050           if (rewrite_for_locally_received_packets)
3051               next0 = next0 && next0_override ? next0_override : next0;
3052
3053           from += 1;
3054           n_left_from -= 1;
3055           to_next += 1;
3056           n_left_to_next -= 1;
3057       
3058           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3059                                            to_next, n_left_to_next,
3060                                            pi0, next0);
3061         }
3062   
3063       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3064     }
3065
3066   /* Need to do trace after rewrites to pick up new packet data. */
3067   if (node->flags & VLIB_NODE_FLAG_TRACE)
3068     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3069
3070   return frame->n_vectors;
3071 }
3072
3073
3074 /** @brief IPv4 transit rewrite node.
3075     @node ip4-rewrite-transit
3076
3077     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
3078     header checksum, fetch the ip adjacency, check the outbound mtu,
3079     apply the adjacency rewrite, and send pkts to the adjacency
3080     rewrite header's rewrite_next_index.
3081
3082     @param vm vlib_main_t corresponding to the current thread
3083     @param node vlib_node_runtime_t
3084     @param frame vlib_frame_t whose contents should be dispatched
3085
3086     @par Graph mechanics: buffer metadata, next index usage
3087
3088     @em Uses:
3089     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3090         - the rewrite adjacency index
3091     - <code>adj->lookup_next_index</code>
3092         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3093           the packet will be dropped. 
3094     - <code>adj->rewrite_header</code>
3095         - Rewrite string length, rewrite string, next_index
3096
3097     @em Sets:
3098     - <code>b->current_data, b->current_length</code>
3099         - Updated net of applying the rewrite string
3100
3101     <em>Next Indices:</em>
3102     - <code> adj->rewrite_header.next_index </code>
3103       or @c error-drop 
3104 */
3105 static uword
3106 ip4_rewrite_transit (vlib_main_t * vm,
3107                      vlib_node_runtime_t * node,
3108                      vlib_frame_t * frame)
3109 {
3110   return ip4_rewrite_inline (vm, node, frame,
3111                              /* rewrite_for_locally_received_packets */ 0);
3112 }
3113
3114 /** @brief IPv4 local rewrite node.
3115     @node ip4-rewrite-local
3116
3117     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3118     the outbound interface mtu, apply the adjacency rewrite, and send
3119     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3120     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3121     dst = interface addr."
3122
3123     @param vm vlib_main_t corresponding to the current thread
3124     @param node vlib_node_runtime_t
3125     @param frame vlib_frame_t whose contents should be dispatched
3126
3127     @par Graph mechanics: buffer metadata, next index usage
3128
3129     @em Uses:
3130     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3131         - the rewrite adjacency index
3132     - <code>adj->lookup_next_index</code>
3133         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3134           the packet will be dropped. 
3135     - <code>adj->rewrite_header</code>
3136         - Rewrite string length, rewrite string, next_index
3137
3138     @em Sets:
3139     - <code>b->current_data, b->current_length</code>
3140         - Updated net of applying the rewrite string
3141
3142     <em>Next Indices:</em>
3143     - <code> adj->rewrite_header.next_index </code>
3144       or @c error-drop 
3145 */
3146
3147 static uword
3148 ip4_rewrite_local (vlib_main_t * vm,
3149                    vlib_node_runtime_t * node,
3150                    vlib_frame_t * frame)
3151 {
3152   return ip4_rewrite_inline (vm, node, frame,
3153                              /* rewrite_for_locally_received_packets */ 1);
3154 }
3155
3156 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3157   .function = ip4_rewrite_transit,
3158   .name = "ip4-rewrite-transit",
3159   .vector_size = sizeof (u32),
3160
3161   .format_trace = format_ip4_rewrite_trace,
3162
3163   .n_next_nodes = 3,
3164   .next_nodes = {
3165     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3166     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3167     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3168   },
3169 };
3170
3171 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
3172
3173 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3174   .function = ip4_rewrite_local,
3175   .name = "ip4-rewrite-local",
3176   .vector_size = sizeof (u32),
3177
3178   .sibling_of = "ip4-rewrite-transit",
3179
3180   .format_trace = format_ip4_rewrite_trace,
3181
3182   .n_next_nodes = 0,
3183 };
3184
3185 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
3186
3187 static clib_error_t *
3188 add_del_interface_table (vlib_main_t * vm,
3189                          unformat_input_t * input,
3190                          vlib_cli_command_t * cmd)
3191 {
3192   vnet_main_t * vnm = vnet_get_main();
3193   clib_error_t * error = 0;
3194   u32 sw_if_index, table_id;
3195
3196   sw_if_index = ~0;
3197
3198   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3199     {
3200       error = clib_error_return (0, "unknown interface `%U'",
3201                                  format_unformat_error, input);
3202       goto done;
3203     }
3204
3205   if (unformat (input, "%d", &table_id))
3206     ;
3207   else
3208     {
3209       error = clib_error_return (0, "expected table id `%U'",
3210                                  format_unformat_error, input);
3211       goto done;
3212     }
3213
3214   {
3215     ip4_main_t * im = &ip4_main;
3216     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3217
3218     if (fib) 
3219       {
3220         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3221         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3222     }
3223   }
3224
3225  done:
3226   return error;
3227 }
3228
3229 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3230   .path = "set interface ip table",
3231   .function = add_del_interface_table,
3232   .short_help = "Add/delete FIB table id for interface",
3233 };
3234
3235
3236 static uword
3237 ip4_lookup_multicast (vlib_main_t * vm,
3238                       vlib_node_runtime_t * node,
3239                       vlib_frame_t * frame)
3240 {
3241   ip4_main_t * im = &ip4_main;
3242   ip_lookup_main_t * lm = &im->lookup_main;
3243   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3244   u32 n_left_from, n_left_to_next, * from, * to_next;
3245   ip_lookup_next_t next;
3246   u32 cpu_index = os_get_cpu_number();
3247
3248   from = vlib_frame_vector_args (frame);
3249   n_left_from = frame->n_vectors;
3250   next = node->cached_next_index;
3251
3252   while (n_left_from > 0)
3253     {
3254       vlib_get_next_frame (vm, node, next,
3255                            to_next, n_left_to_next);
3256
3257       while (n_left_from >= 4 && n_left_to_next >= 2)
3258         {
3259           vlib_buffer_t * p0, * p1;
3260           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3261           ip_lookup_next_t next0, next1;
3262           ip4_header_t * ip0, * ip1;
3263           ip_adjacency_t * adj0, * adj1;
3264           u32 fib_index0, fib_index1;
3265           u32 flow_hash_config0, flow_hash_config1;
3266
3267           /* Prefetch next iteration. */
3268           {
3269             vlib_buffer_t * p2, * p3;
3270
3271             p2 = vlib_get_buffer (vm, from[2]);
3272             p3 = vlib_get_buffer (vm, from[3]);
3273
3274             vlib_prefetch_buffer_header (p2, LOAD);
3275             vlib_prefetch_buffer_header (p3, LOAD);
3276
3277             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3278             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3279           }
3280
3281           pi0 = to_next[0] = from[0];
3282           pi1 = to_next[1] = from[1];
3283
3284           p0 = vlib_get_buffer (vm, pi0);
3285           p1 = vlib_get_buffer (vm, pi1);
3286
3287           ip0 = vlib_buffer_get_current (p0);
3288           ip1 = vlib_buffer_get_current (p1);
3289
3290           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3291           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3292           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3293             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3294           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3295             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3296
3297           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3298                                               &ip0->dst_address, p0);
3299           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3300                                               &ip1->dst_address, p1);
3301
3302           adj0 = ip_get_adjacency (lm, adj_index0);
3303           adj1 = ip_get_adjacency (lm, adj_index1);
3304
3305           next0 = adj0->lookup_next_index;
3306           next1 = adj1->lookup_next_index;
3307
3308           flow_hash_config0 = 
3309               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3310
3311           flow_hash_config1 = 
3312               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3313
3314           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3315               (ip0, flow_hash_config0);
3316                                                                   
3317           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3318               (ip1, flow_hash_config1);
3319
3320           ASSERT (adj0->n_adj > 0);
3321           ASSERT (adj1->n_adj > 0);
3322           ASSERT (is_pow2 (adj0->n_adj));
3323           ASSERT (is_pow2 (adj1->n_adj));
3324           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3325           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3326
3327           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3328           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3329
3330           if (1) /* $$$$$$ HACK FIXME */
3331           vlib_increment_combined_counter 
3332               (cm, cpu_index, adj_index0, 1,
3333                vlib_buffer_length_in_chain (vm, p0));
3334           if (1) /* $$$$$$ HACK FIXME */
3335           vlib_increment_combined_counter 
3336               (cm, cpu_index, adj_index1, 1,
3337                vlib_buffer_length_in_chain (vm, p1));
3338
3339           from += 2;
3340           to_next += 2;
3341           n_left_to_next -= 2;
3342           n_left_from -= 2;
3343
3344           wrong_next = (next0 != next) + 2*(next1 != next);
3345           if (PREDICT_FALSE (wrong_next != 0))
3346             {
3347               switch (wrong_next)
3348                 {
3349                 case 1:
3350                   /* A B A */
3351                   to_next[-2] = pi1;
3352                   to_next -= 1;
3353                   n_left_to_next += 1;
3354                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3355                   break;
3356
3357                 case 2:
3358                   /* A A B */
3359                   to_next -= 1;
3360                   n_left_to_next += 1;
3361                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3362                   break;
3363
3364                 case 3:
3365                   /* A B C */
3366                   to_next -= 2;
3367                   n_left_to_next += 2;
3368                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3369                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3370                   if (next0 == next1)
3371                     {
3372                       /* A B B */
3373                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3374                       next = next1;
3375                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3376                     }
3377                 }
3378             }
3379         }
3380     
3381       while (n_left_from > 0 && n_left_to_next > 0)
3382         {
3383           vlib_buffer_t * p0;
3384           ip4_header_t * ip0;
3385           u32 pi0, adj_index0;
3386           ip_lookup_next_t next0;
3387           ip_adjacency_t * adj0;
3388           u32 fib_index0;
3389           u32 flow_hash_config0;
3390
3391           pi0 = from[0];
3392           to_next[0] = pi0;
3393
3394           p0 = vlib_get_buffer (vm, pi0);
3395
3396           ip0 = vlib_buffer_get_current (p0);
3397
3398           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3399                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3400           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3401               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3402           
3403           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3404                                               &ip0->dst_address, p0);
3405
3406           adj0 = ip_get_adjacency (lm, adj_index0);
3407
3408           next0 = adj0->lookup_next_index;
3409
3410           flow_hash_config0 = 
3411               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3412
3413           vnet_buffer (p0)->ip.flow_hash = 
3414             ip4_compute_flow_hash (ip0, flow_hash_config0);
3415
3416           ASSERT (adj0->n_adj > 0);
3417           ASSERT (is_pow2 (adj0->n_adj));
3418           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3419
3420           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3421
3422           if (1) /* $$$$$$ HACK FIXME */
3423               vlib_increment_combined_counter 
3424                   (cm, cpu_index, adj_index0, 1,
3425                    vlib_buffer_length_in_chain (vm, p0));
3426
3427           from += 1;
3428           to_next += 1;
3429           n_left_to_next -= 1;
3430           n_left_from -= 1;
3431
3432           if (PREDICT_FALSE (next0 != next))
3433             {
3434               n_left_to_next += 1;
3435               vlib_put_next_frame (vm, node, next, n_left_to_next);
3436               next = next0;
3437               vlib_get_next_frame (vm, node, next,
3438                                    to_next, n_left_to_next);
3439               to_next[0] = pi0;
3440               to_next += 1;
3441               n_left_to_next -= 1;
3442             }
3443         }
3444
3445       vlib_put_next_frame (vm, node, next, n_left_to_next);
3446     }
3447
3448   if (node->flags & VLIB_NODE_FLAG_TRACE)
3449       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3450
3451   return frame->n_vectors;
3452 }
3453
3454 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3455   .function = ip4_lookup_multicast,
3456   .name = "ip4-lookup-multicast",
3457   .vector_size = sizeof (u32),
3458   .sibling_of = "ip4-lookup",
3459   .format_trace = format_ip4_lookup_trace,
3460
3461   .n_next_nodes = 0,
3462 };
3463
3464 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
3465
3466 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3467   .function = ip4_drop,
3468   .name = "ip4-multicast",
3469   .vector_size = sizeof (u32),
3470
3471   .format_trace = format_ip4_forward_next_trace,
3472
3473   .n_next_nodes = 1,
3474   .next_nodes = {
3475     [0] = "error-drop",
3476   },
3477 };
3478
3479 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3480 {
3481   ip4_main_t * im = &ip4_main;
3482   ip4_fib_mtrie_t * mtrie0;
3483   ip4_fib_mtrie_leaf_t leaf0;
3484   u32 adj_index0;
3485     
3486   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3487
3488   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3489   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3490   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3491   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3492   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3493   
3494   /* Handle default route. */
3495   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3496   
3497   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3498   
3499   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3500                                                   a, 
3501                                                   /* no_default_route */ 0);
3502 }
3503  
3504 static clib_error_t *
3505 test_lookup_command_fn (vlib_main_t * vm,
3506                         unformat_input_t * input,
3507                         vlib_cli_command_t * cmd)
3508 {
3509   u32 table_id = 0;
3510   f64 count = 1;
3511   u32 n;
3512   int i;
3513   ip4_address_t ip4_base_address;
3514   u64 errors = 0;
3515
3516   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3517       if (unformat (input, "table %d", &table_id))
3518         ;
3519       else if (unformat (input, "count %f", &count))
3520         ;
3521
3522       else if (unformat (input, "%U",
3523                          unformat_ip4_address, &ip4_base_address))
3524         ;
3525       else
3526         return clib_error_return (0, "unknown input `%U'",
3527                                   format_unformat_error, input);
3528   }
3529
3530   n = count;
3531
3532   for (i = 0; i < n; i++)
3533     {
3534       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3535         errors++;
3536
3537       ip4_base_address.as_u32 = 
3538         clib_host_to_net_u32 (1 + 
3539                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3540     }
3541
3542   if (errors) 
3543     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3544   else
3545     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3546
3547   return 0;
3548 }
3549
3550 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3551     .path = "test lookup",
3552     .short_help = "test lookup",
3553     .function = test_lookup_command_fn,
3554 };
3555
3556 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3557 {
3558   ip4_main_t * im4 = &ip4_main;
3559   ip4_fib_t * fib;
3560   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3561
3562   if (p == 0)
3563     return VNET_API_ERROR_NO_SUCH_FIB;
3564
3565   fib = vec_elt_at_index (im4->fibs, p[0]);
3566
3567   fib->flow_hash_config = flow_hash_config;
3568   return 0;
3569 }
3570  
3571 static clib_error_t *
3572 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3573                              unformat_input_t * input,
3574                              vlib_cli_command_t * cmd)
3575 {
3576   int matched = 0;
3577   u32 table_id = 0;
3578   u32 flow_hash_config = 0;
3579   int rv;
3580
3581   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3582     if (unformat (input, "table %d", &table_id))
3583       matched = 1;
3584 #define _(a,v) \
3585     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3586     foreach_flow_hash_bit
3587 #undef _
3588     else break;
3589   }
3590   
3591   if (matched == 0)
3592     return clib_error_return (0, "unknown input `%U'",
3593                               format_unformat_error, input);
3594   
3595   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3596   switch (rv)
3597     {
3598     case 0:
3599       break;
3600       
3601     case VNET_API_ERROR_NO_SUCH_FIB:
3602       return clib_error_return (0, "no such FIB table %d", table_id);
3603       
3604     default:
3605       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3606       break;
3607     }
3608   
3609   return 0;
3610 }
3611  
3612 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3613   .path = "set ip flow-hash",
3614   .short_help = 
3615   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3616   .function = set_ip_flow_hash_command_fn,
3617 };
3618  
3619 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3620                                  u32 table_index)
3621 {
3622   vnet_main_t * vnm = vnet_get_main();
3623   vnet_interface_main_t * im = &vnm->interface_main;
3624   ip4_main_t * ipm = &ip4_main;
3625   ip_lookup_main_t * lm = &ipm->lookup_main;
3626   vnet_classify_main_t * cm = &vnet_classify_main;
3627
3628   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3629     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3630
3631   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3632     return VNET_API_ERROR_NO_SUCH_ENTRY;
3633
3634   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3635   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3636
3637   return 0;
3638 }
3639
3640 static clib_error_t *
3641 set_ip_classify_command_fn (vlib_main_t * vm,
3642                             unformat_input_t * input,
3643                             vlib_cli_command_t * cmd)
3644 {
3645   u32 table_index = ~0;
3646   int table_index_set = 0;
3647   u32 sw_if_index = ~0;
3648   int rv;
3649   
3650   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3651     if (unformat (input, "table-index %d", &table_index))
3652       table_index_set = 1;
3653     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3654                        vnet_get_main(), &sw_if_index))
3655       ;
3656     else
3657       break;
3658   }
3659       
3660   if (table_index_set == 0)
3661     return clib_error_return (0, "classify table-index must be specified");
3662
3663   if (sw_if_index == ~0)
3664     return clib_error_return (0, "interface / subif must be specified");
3665
3666   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3667
3668   switch (rv)
3669     {
3670     case 0:
3671       break;
3672
3673     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3674       return clib_error_return (0, "No such interface");
3675
3676     case VNET_API_ERROR_NO_SUCH_ENTRY:
3677       return clib_error_return (0, "No such classifier table");
3678     }
3679   return 0;
3680 }
3681
3682 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3683     .path = "set ip classify",
3684     .short_help = 
3685     "set ip classify intfc <int> table-index <index>",
3686     .function = set_ip_classify_command_fn,
3687 };
3688
3689
3690 #define TEST_CODE 1
3691 #if TEST_CODE > 0
3692
3693 static clib_error_t *
3694 set_interface_output_feature_command_fn (vlib_main_t * vm,
3695                                          unformat_input_t * input,
3696                                          vlib_cli_command_t * cmd)
3697 {
3698   vnet_main_t * vnm = vnet_get_main();
3699   u32 sw_if_index = ~0;
3700   int is_add = 1;
3701   ip4_main_t * im = &ip4_main;
3702   ip_lookup_main_t * lm = &im->lookup_main;
3703
3704   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
3705     {
3706       if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3707         ;
3708       else if (unformat (input, "del"))
3709         is_add = 0;
3710       else
3711         break;
3712     }
3713
3714   if (sw_if_index == ~0)
3715     return clib_error_return (0, "unknown interface `%U'",
3716                               format_unformat_error, input);
3717
3718   lm->tx_sw_if_has_ip_output_features =
3719     clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3720
3721   return 0;
3722 }
3723
3724 VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
3725   .path = "set interface output feature",
3726   .function = set_interface_output_feature_command_fn,
3727   .short_help = "set interface output feature <intfc>",
3728 };
3729 #endif /* TEST_CODE */