8a49854966ca10901a980263f86fc073d27db209
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>   
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>     
51
52 /** @file
53     vnet ip4 forwarding
54 */
55
56 /* This is really, really simple but stupid fib. */
57 u32
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
59                            ip4_address_t * dst,
60                            u32 disable_default_route)
61 {
62   ip_lookup_main_t * lm = &im->lookup_main;
63   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64   uword * p, * hash, key;
65   i32 i, i_min, dst_address, ai;
66
67   i_min = disable_default_route ? 1 : 0;
68   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
70     {
71       hash = fib->adj_index_by_dst_address[i];
72       if (! hash)
73         continue;
74
75       key = dst_address & im->fib_masks[i];
76       if ((p = hash_get (hash, key)) != 0)
77         {
78           ai = p[0];
79           goto done;
80         }
81     }
82
83   /* Nothing matches in table. */
84   ai = lm->miss_adj_index;
85
86  done:
87   return ai;
88 }
89
90 /** @brief Create FIB from table ID and init all hashing.
91     @param im - @ref ip4_main_t
92     @param table_id - table ID
93     @return fib - @ref ip4_fib_t
94 */
95 static ip4_fib_t *
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
97 {
98   ip4_fib_t * fib;
99   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100   vec_add2 (im->fibs, fib, 1);
101   fib->table_id = table_id;
102   fib->index = fib - im->fibs;
103   /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105   fib->fwd_classify_table_index = ~0;
106   fib->rev_classify_table_index = ~0;
107   ip4_mtrie_init (&fib->mtrie);
108   return fib;
109 }
110
111 /** @brief Find existing or Create new FIB based on index
112     @param im @ref ip4_main_t
113     @param table_index_or_id - overloaded parameter referring
114            to the table or a table's index in the FIB vector
115     @param flags - used to check if table_index_or_id was a table or
116            an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117     @return either the existing or a new ip4_fib_t entry
118 */
119 ip4_fib_t *
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121                                    u32 table_index_or_id, u32 flags)
122 {
123   uword * p, fib_index;
124
125   fib_index = table_index_or_id;
126   /* If this isn't a FIB_INDEX ... */
127   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
128     {
129       /* If passed ~0 then request the next table available */
130       if (table_index_or_id == ~0) {
131         table_index_or_id = 0;
132         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
133           table_index_or_id++;
134         }
135         /* Create the next table and return the ip4_fib_t associated with it */
136         return create_fib_with_table_id (im, table_index_or_id);
137       }
138       /* A specific table_id was requested.. */
139       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140       /* ... and if it doesn't exist create it else grab its index */
141       if (! p)
142         return create_fib_with_table_id (im, table_index_or_id);
143       fib_index = p[0];
144     }
145   /* Return the ip4_fib_t associated with this index */
146   return vec_elt_at_index (im->fibs, fib_index);
147 }
148
149 static void
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
151                                        ip4_fib_t * fib,
152                                        u32 address_length)
153 {
154   hash_t * h;
155   uword max_index;
156
157   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
159
160   fib->adj_index_by_dst_address[address_length] =
161     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
162
163   hash_set_flags (fib->adj_index_by_dst_address[address_length],
164                   HASH_FLAG_NO_AUTO_SHRINK);
165
166   h = hash_header (fib->adj_index_by_dst_address[address_length]);
167   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
168
169   /* Initialize new/old hash value vectors. */
170   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
172 }
173
174 static void
175 ip4_fib_set_adj_index (ip4_main_t * im,
176                        ip4_fib_t * fib,
177                        u32 flags,
178                        u32 dst_address_u32,
179                        u32 dst_address_length,
180                        u32 adj_index)
181 {
182   ip_lookup_main_t * lm = &im->lookup_main;
183   uword * hash;
184
185   if (vec_bytes(fib->old_hash_values))
186     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187   if (vec_bytes(fib->new_hash_values))
188     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189   fib->new_hash_values[0] = adj_index;
190
191   /* Make sure adj index is valid. */
192   if (CLIB_DEBUG > 0)
193     (void) ip_get_adjacency (lm, adj_index);
194
195   hash = fib->adj_index_by_dst_address[dst_address_length];
196
197   hash = _hash_set3 (hash, dst_address_u32,
198                      fib->new_hash_values,
199                      fib->old_hash_values);
200
201   fib->adj_index_by_dst_address[dst_address_length] = hash;
202
203   if (vec_len (im->add_del_route_callbacks) > 0)
204     {
205       ip4_add_del_route_callback_t * cb;
206       ip4_address_t d;
207       uword * p;
208
209       d.data_u32 = dst_address_u32;
210       vec_foreach (cb, im->add_del_route_callbacks)
211         if ((flags & cb->required_flags) == cb->required_flags)
212           cb->function (im, cb->function_opaque,
213                         fib, flags,
214                         &d, dst_address_length,
215                         fib->old_hash_values,
216                         fib->new_hash_values);
217
218       p = hash_get (hash, dst_address_u32);
219       /* hash_get should never return NULL here */
220       if (p)
221           clib_memcpy (p, fib->new_hash_values, 
222                        vec_bytes (fib->new_hash_values));
223       else
224           ASSERT(0);
225     }
226 }
227
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
229 {
230   ip_lookup_main_t * lm = &im->lookup_main;
231   ip4_fib_t * fib;
232   u32 dst_address, dst_address_length, adj_index, old_adj_index;
233   uword * hash, is_del;
234   ip4_add_del_route_callback_t * cb;
235
236   /* Either create new adjacency or use given one depending on arguments. */
237   if (a->n_add_adj > 0)
238     {
239       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
241     }
242   else
243     adj_index = a->adj_index;
244
245   dst_address = a->dst_address.data_u32;
246   dst_address_length = a->dst_address_length;
247   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
248
249   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250   dst_address &= im->fib_masks[dst_address_length];
251
252   if (! fib->adj_index_by_dst_address[dst_address_length])
253     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
254
255   hash = fib->adj_index_by_dst_address[dst_address_length];
256
257   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
258
259   if (is_del)
260     {
261       fib->old_hash_values[0] = ~0;
262       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263       fib->adj_index_by_dst_address[dst_address_length] = hash;
264
265       if (vec_len (im->add_del_route_callbacks) > 0
266           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
267         {
268           fib->new_hash_values[0] = ~0;
269           vec_foreach (cb, im->add_del_route_callbacks)
270             if ((a->flags & cb->required_flags) == cb->required_flags)
271               cb->function (im, cb->function_opaque,
272                             fib, a->flags,
273                             &a->dst_address, dst_address_length,
274                             fib->old_hash_values,
275                             fib->new_hash_values);
276         }
277     }
278   else
279     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
280                            adj_index);
281
282   old_adj_index = fib->old_hash_values[0];
283
284   /* Avoid spurious reference count increments */
285   if (old_adj_index == adj_index
286       && adj_index != ~0
287       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
288     {
289       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290       if (adj->share_count > 0)
291         adj->share_count --;
292     }
293
294   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295                                is_del ? old_adj_index : adj_index,
296                                is_del);
297
298   /* Delete old adjacency index if present and changed. */
299   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300       && old_adj_index != ~0
301       && old_adj_index != adj_index)
302     ip_del_adjacency (lm, old_adj_index);
303 }
304
305
306 u32
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
308                             u32 fib_index,
309                             ip4_address_t *next_hop,
310                             u32 next_hop_sw_if_index,
311                             u32 explicit_fib_index)
312 {
313   ip_lookup_main_t * lm = &im->lookup_main;
314   vnet_main_t * vnm = vnet_get_main();
315   uword * nh_hash, * nh_result;
316   int is_interface_next_hop;
317   u32 nh_adj_index;
318   ip4_fib_t * fib;
319
320   fib = vec_elt_at_index (im->fibs, fib_index);
321
322   is_interface_next_hop = next_hop->data_u32 == 0;
323   if (is_interface_next_hop)
324     {
325       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
326       if (nh_result)
327           nh_adj_index = *nh_result;
328       else
329         {
330            ip_adjacency_t * adj;
331            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
332                                    &nh_adj_index);
333            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
336         }
337     }
338   else if (next_hop_sw_if_index == ~0)
339     {
340       /* next-hop is recursive. we always need a indirect adj
341        * for recursive paths. Any LPM we perform now will give
342        * us a valid adj, but without tracking the next-hop we
343        * have no way to keep it valid.
344        */
345       ip_adjacency_t add_adj;
346       memset (&add_adj, 0, sizeof(add_adj));
347       add_adj.n_adj = 1;
348       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350       add_adj.explicit_fib_index = explicit_fib_index;
351       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
352     }
353   else
354     {
355       nh_hash = fib->adj_index_by_dst_address[32];
356       nh_result = hash_get (nh_hash, next_hop->data_u32);
357
358       /* Next hop must be known. */
359       if (! nh_result)
360         {
361           ip_adjacency_t * adj;
362
363           /* no /32 exists, get the longest prefix match */
364           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
365                                                     next_hop, 0);
366           adj = ip_get_adjacency (lm, nh_adj_index);
367           /* if ARP interface adjacency is present, we need to
368              install ARP adjaceny for specific next hop */
369           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370               adj->arp.next_hop.ip4.as_u32 == 0)
371             {
372               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
373             }
374         }
375       else
376         {
377           nh_adj_index = *nh_result;
378         }
379     }
380
381   return (nh_adj_index);
382 }
383
384 void
385 ip4_add_del_route_next_hop (ip4_main_t * im,
386                             u32 flags,
387                             ip4_address_t * dst_address,
388                             u32 dst_address_length,
389                             ip4_address_t * next_hop,
390                             u32 next_hop_sw_if_index,
391                             u32 next_hop_weight, u32 adj_index, 
392                             u32 explicit_fib_index)
393 {
394   vnet_main_t * vnm = vnet_get_main();
395   ip_lookup_main_t * lm = &im->lookup_main;
396   u32 fib_index;
397   ip4_fib_t * fib;
398   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399   u32 dst_adj_index, nh_adj_index;
400   uword * dst_hash, * dst_result;
401   ip_adjacency_t * dst_adj;
402   ip_multipath_adjacency_t * old_mp, * new_mp;
403   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404   clib_error_t * error = 0;
405
406   if (explicit_fib_index == (u32)~0)
407       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
408   else
409       fib_index = explicit_fib_index;
410
411   fib = vec_elt_at_index (im->fibs, fib_index);
412
413   /* Lookup next hop to be added or deleted. */
414   if (adj_index == (u32)~0)
415     {
416         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
417                                                   next_hop,
418                                                   next_hop_sw_if_index,
419                                                   explicit_fib_index);
420     }
421   else
422     {
423       nh_adj_index = adj_index;
424     }
425   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
427
428   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429   dst_result = hash_get (dst_hash, dst_address_u32);
430   if (dst_result)
431     {
432       dst_adj_index = dst_result[0];
433       dst_adj = ip_get_adjacency (lm, dst_adj_index);
434     }
435   else
436     {
437       /* For deletes destination must be known. */
438       if (is_del)
439         {
440           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441           error = clib_error_return (0, "unknown destination %U/%d",
442                                      format_ip4_address, dst_address,
443                                      dst_address_length);
444           goto done;
445         }
446
447       dst_adj_index = ~0;
448       dst_adj = 0;
449     }
450
451   /* Ignore adds of X/32 with next hop of X. */
452   if (! is_del
453       && dst_address_length == 32
454       && dst_address->data_u32 == next_hop->data_u32 
455       && adj_index != (u32)~0)
456     {
457       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458       error = clib_error_return (0, "prefix matches next hop %U/%d",
459                                  format_ip4_address, dst_address,
460                                  dst_address_length);
461       goto done;
462     }
463
464   /* Destination is not known and default weight is set so add route
465      to existing non-multipath adjacency */
466   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
467     {
468       /* create / delete additional mapping of existing adjacency */
469       ip4_add_del_route_args_t a;
470
471       a.table_index_or_table_id = fib_index;
472       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473                  | IP4_ROUTE_FLAG_FIB_INDEX
474                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477       a.dst_address = dst_address[0];
478       a.dst_address_length = dst_address_length;
479       a.adj_index = nh_adj_index;
480       a.add_adj = 0;
481       a.n_add_adj = 0;
482
483       ip4_add_del_route (im, &a);
484       goto done;
485     }
486
487   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
488
489   if (! ip_multipath_adjacency_add_del_next_hop
490       (lm, is_del,
491        old_mp_adj_index,
492        nh_adj_index,
493        next_hop_weight,
494        &new_mp_adj_index))
495     {
496       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498                                  format_ip4_address, next_hop);
499       goto done;
500     }
501   
502   old_mp = new_mp = 0;
503   if (old_mp_adj_index != ~0)
504     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505   if (new_mp_adj_index != ~0)
506     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
507
508   if (old_mp != new_mp)
509     {
510       ip4_add_del_route_args_t a;
511       ip_adjacency_t * adj;
512
513       a.table_index_or_table_id = fib_index;
514       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515                  | IP4_ROUTE_FLAG_FIB_INDEX
516                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518       a.dst_address = dst_address[0];
519       a.dst_address_length = dst_address_length;
520       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
521       a.add_adj = 0;
522       a.n_add_adj = 0;
523
524       ip4_add_del_route (im, &a);
525
526       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
527       if (adj->n_adj == 1)
528         adj->share_count += is_del ? -1 : 1;
529     }
530
531  done:
532   if (error)
533     clib_error_report (error);
534 }
535
536 void *
537 ip4_get_route (ip4_main_t * im,
538                u32 table_index_or_table_id,
539                u32 flags,
540                u8 * address,
541                u32 address_length)
542 {
543   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544   u32 dst_address = * (u32 *) address;
545   uword * hash, * p;
546
547   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548   dst_address &= im->fib_masks[address_length];
549
550   hash = fib->adj_index_by_dst_address[address_length];
551   p = hash_get (hash, dst_address);
552   return (void *) p;
553 }
554
555 void
556 ip4_foreach_matching_route (ip4_main_t * im,
557                             u32 table_index_or_table_id,
558                             u32 flags,
559                             ip4_address_t * address,
560                             u32 address_length,
561                             ip4_address_t ** results,
562                             u8 ** result_lengths)
563 {
564   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565   u32 dst_address = address->data_u32;
566   u32 this_length = address_length;
567   
568   if (*results)
569     _vec_len (*results) = 0;
570   if (*result_lengths)
571     _vec_len (*result_lengths) = 0;
572
573   while (this_length <= 32 && vec_len (results) == 0)
574     {
575       uword k, v;
576       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
578           {
579             ip4_address_t a;
580             a.data_u32 = k;
581             vec_add1 (*results, a);
582             vec_add1 (*result_lengths, this_length);
583           }
584       }));
585
586       this_length++;
587     }
588 }
589
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591                                   u32 table_index_or_table_id,
592                                   u32 flags)
593 {
594   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595   ip_lookup_main_t * lm = &im->lookup_main;
596   u32 i, l;
597   ip4_address_t a;
598   ip4_add_del_route_callback_t * cb;
599   static ip4_address_t * to_delete;
600
601   if (lm->n_adjacency_remaps == 0)
602     return;
603
604   for (l = 0; l <= 32; l++)
605     {
606       hash_pair_t * p;
607       uword * hash = fib->adj_index_by_dst_address[l];
608
609       if (hash_elts (hash) == 0)
610         continue;
611
612       if (to_delete)
613         _vec_len (to_delete) = 0;
614
615       hash_foreach_pair (p, hash, ({
616         u32 adj_index = p->value[0];
617         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
618
619         if (m)
620           {
621             /* Record destination address from hash key. */
622             a.data_u32 = p->key;
623
624             /* New adjacency points to nothing: so delete prefix. */
625             if (m == ~0)
626               vec_add1 (to_delete, a);
627             else
628               {
629                 /* Remap to new adjacency. */
630                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
631
632                 /* Set new adjacency value. */
633                 fib->new_hash_values[0] = p->value[0] = m - 1;
634
635                 vec_foreach (cb, im->add_del_route_callbacks)
636                   if ((flags & cb->required_flags) == cb->required_flags)
637                     cb->function (im, cb->function_opaque,
638                                   fib, flags | IP4_ROUTE_FLAG_ADD,
639                                   &a, l,
640                                   fib->old_hash_values,
641                                   fib->new_hash_values);
642               }
643           }
644       }));
645
646       fib->new_hash_values[0] = ~0;
647       for (i = 0; i < vec_len (to_delete); i++)
648         {
649           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650           vec_foreach (cb, im->add_del_route_callbacks)
651             if ((flags & cb->required_flags) == cb->required_flags)
652               cb->function (im, cb->function_opaque,
653                             fib, flags | IP4_ROUTE_FLAG_DEL,
654                             &a, l,
655                             fib->old_hash_values,
656                             fib->new_hash_values);
657         }
658     }
659
660   /* Also remap adjacencies in mtrie. */
661   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
662
663   /* Reset mapping table. */
664   vec_zero (lm->adjacency_remap_table);
665
666   /* All remaps have been performed. */
667   lm->n_adjacency_remaps = 0;
668 }
669
670 void ip4_delete_matching_routes (ip4_main_t * im,
671                                  u32 table_index_or_table_id,
672                                  u32 flags,
673                                  ip4_address_t * address,
674                                  u32 address_length)
675 {
676   static ip4_address_t * matching_addresses;
677   static u8 * matching_address_lengths;
678   u32 l, i;
679   ip4_add_del_route_args_t a;
680
681   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682   a.table_index_or_table_id = table_index_or_table_id;
683   a.adj_index = ~0;
684   a.add_adj = 0;
685   a.n_add_adj = 0;
686
687   for (l = address_length + 1; l <= 32; l++)
688     {
689       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
690                                   address,
691                                   l,
692                                   &matching_addresses,
693                                   &matching_address_lengths);
694       for (i = 0; i < vec_len (matching_addresses); i++)
695         {
696           a.dst_address = matching_addresses[i];
697           a.dst_address_length = matching_address_lengths[i];
698           ip4_add_del_route (im, &a);
699         }
700     }
701
702   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
703 }
704
705 void
706 ip4_forward_next_trace (vlib_main_t * vm,
707                         vlib_node_runtime_t * node,
708                         vlib_frame_t * frame,
709                         vlib_rx_or_tx_t which_adj_index);
710
711 always_inline uword
712 ip4_lookup_inline (vlib_main_t * vm,
713                    vlib_node_runtime_t * node,
714                    vlib_frame_t * frame,
715                    int lookup_for_responses_to_locally_received_packets,
716                    int is_indirect)
717 {
718   ip4_main_t * im = &ip4_main;
719   ip_lookup_main_t * lm = &im->lookup_main;
720   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721   u32 n_left_from, n_left_to_next, * from, * to_next;
722   ip_lookup_next_t next;
723   u32 cpu_index = os_get_cpu_number();
724
725   from = vlib_frame_vector_args (frame);
726   n_left_from = frame->n_vectors;
727   next = node->cached_next_index;
728
729   while (n_left_from > 0)
730     {
731       vlib_get_next_frame (vm, node, next,
732                            to_next, n_left_to_next);
733
734       while (n_left_from >= 4 && n_left_to_next >= 2)
735         {
736           vlib_buffer_t * p0, * p1;
737           ip4_header_t * ip0, * ip1;
738           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739           ip_lookup_next_t next0, next1;
740           ip_adjacency_t * adj0, * adj1;
741           ip4_fib_mtrie_t * mtrie0, * mtrie1;
742           ip4_fib_mtrie_leaf_t leaf0, leaf1;
743           ip4_address_t * dst_addr0, *dst_addr1;
744           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746           u32 flow_hash_config0, flow_hash_config1;
747           u32 hash_c0, hash_c1;
748           u32 wrong_next;
749
750           /* Prefetch next iteration. */
751           {
752             vlib_buffer_t * p2, * p3;
753
754             p2 = vlib_get_buffer (vm, from[2]);
755             p3 = vlib_get_buffer (vm, from[3]);
756
757             vlib_prefetch_buffer_header (p2, LOAD);
758             vlib_prefetch_buffer_header (p3, LOAD);
759
760             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
762           }
763
764           pi0 = to_next[0] = from[0];
765           pi1 = to_next[1] = from[1];
766
767           p0 = vlib_get_buffer (vm, pi0);
768           p1 = vlib_get_buffer (vm, pi1);
769
770           ip0 = vlib_buffer_get_current (p0);
771           ip1 = vlib_buffer_get_current (p1);
772
773           if (is_indirect)
774             {
775               ip_adjacency_t * iadj0, * iadj1;
776               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778               dst_addr0 = &iadj0->indirect.next_hop.ip4;
779               dst_addr1 = &iadj1->indirect.next_hop.ip4;
780             }
781           else
782             {
783               dst_addr0 = &ip0->dst_address;
784               dst_addr1 = &ip1->dst_address;
785             }
786
787           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
793
794
795           if (! lookup_for_responses_to_locally_received_packets)
796             {
797               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
799
800               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
801
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
804             }
805
806           tcp0 = (void *) (ip0 + 1);
807           tcp1 = (void *) (ip1 + 1);
808
809           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810                          || ip0->protocol == IP_PROTOCOL_UDP);
811           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812                          || ip1->protocol == IP_PROTOCOL_UDP);
813
814           if (! lookup_for_responses_to_locally_received_packets)
815             {
816               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
818             }
819
820           if (! lookup_for_responses_to_locally_received_packets)
821             {
822               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
824             }
825
826           if (! lookup_for_responses_to_locally_received_packets)
827             {
828               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
830             }
831
832           if (lookup_for_responses_to_locally_received_packets)
833             {
834               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
836             }
837           else
838             {
839               /* Handle default route. */
840               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
842
843               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
845             }
846
847           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
848                                                            dst_addr0,
849                                                            /* no_default_route */ 0));
850           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
851                                                            dst_addr1,
852                                                            /* no_default_route */ 0));
853           adj0 = ip_get_adjacency (lm, adj_index0);
854           adj1 = ip_get_adjacency (lm, adj_index1);
855
856           next0 = adj0->lookup_next_index;
857           next1 = adj1->lookup_next_index;
858
859           /* Use flow hash to compute multipath adjacency. */
860           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862           if (PREDICT_FALSE (adj0->n_adj > 1))
863             {
864               flow_hash_config0 = 
865                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
867                 ip4_compute_flow_hash (ip0, flow_hash_config0);
868             }
869           if (PREDICT_FALSE(adj1->n_adj > 1))
870             {
871               flow_hash_config1 = 
872                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
874                 ip4_compute_flow_hash (ip1, flow_hash_config1);
875             }
876
877           ASSERT (adj0->n_adj > 0);
878           ASSERT (adj1->n_adj > 0);
879           ASSERT (is_pow2 (adj0->n_adj));
880           ASSERT (is_pow2 (adj1->n_adj));
881           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
883
884           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
886
887           if (is_indirect)
888             {
889               /* ARP for next-hop not packet's destination address */
890               if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
891                 ip0->dst_address.as_u32 = dst_addr0->as_u32;
892               if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)
893                 ip1->dst_address.as_u32 = dst_addr1->as_u32;
894             }
895
896           vlib_increment_combined_counter 
897               (cm, cpu_index, adj_index0, 1,
898                vlib_buffer_length_in_chain (vm, p0) 
899                + sizeof(ethernet_header_t));
900           vlib_increment_combined_counter 
901               (cm, cpu_index, adj_index1, 1,
902                vlib_buffer_length_in_chain (vm, p1)
903                + sizeof(ethernet_header_t));
904
905           from += 2;
906           to_next += 2;
907           n_left_to_next -= 2;
908           n_left_from -= 2;
909
910           wrong_next = (next0 != next) + 2*(next1 != next);
911           if (PREDICT_FALSE (wrong_next != 0))
912             {
913               switch (wrong_next)
914                 {
915                 case 1:
916                   /* A B A */
917                   to_next[-2] = pi1;
918                   to_next -= 1;
919                   n_left_to_next += 1;
920                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
921                   break;
922
923                 case 2:
924                   /* A A B */
925                   to_next -= 1;
926                   n_left_to_next += 1;
927                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
928                   break;
929
930                 case 3:
931                   /* A B C */
932                   to_next -= 2;
933                   n_left_to_next += 2;
934                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
935                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
936                   if (next0 == next1)
937                     {
938                       /* A B B */
939                       vlib_put_next_frame (vm, node, next, n_left_to_next);
940                       next = next1;
941                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
942                     }
943                 }
944             }
945         }
946     
947       while (n_left_from > 0 && n_left_to_next > 0)
948         {
949           vlib_buffer_t * p0;
950           ip4_header_t * ip0;
951           __attribute__((unused)) tcp_header_t * tcp0;
952           ip_lookup_next_t next0;
953           ip_adjacency_t * adj0;
954           ip4_fib_mtrie_t * mtrie0;
955           ip4_fib_mtrie_leaf_t leaf0;
956           ip4_address_t * dst_addr0;
957           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
958           u32 flow_hash_config0, hash_c0;
959
960           pi0 = from[0];
961           to_next[0] = pi0;
962
963           p0 = vlib_get_buffer (vm, pi0);
964
965           ip0 = vlib_buffer_get_current (p0);
966
967           if (is_indirect)
968             {
969               ip_adjacency_t * iadj0;
970               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
971               dst_addr0 = &iadj0->indirect.next_hop.ip4;
972             }
973           else
974             {
975               dst_addr0 = &ip0->dst_address;
976             }
977
978           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
979           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
980             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
981
982           if (! lookup_for_responses_to_locally_received_packets)
983             {
984               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
985
986               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
987
988               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
989             }
990
991           tcp0 = (void *) (ip0 + 1);
992
993           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
994                          || ip0->protocol == IP_PROTOCOL_UDP);
995
996           if (! lookup_for_responses_to_locally_received_packets)
997             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
998
999           if (! lookup_for_responses_to_locally_received_packets)
1000             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
1001
1002           if (! lookup_for_responses_to_locally_received_packets)
1003             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
1004
1005           if (lookup_for_responses_to_locally_received_packets)
1006             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1007           else
1008             {
1009               /* Handle default route. */
1010               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1011               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1012             }
1013
1014           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1015                                                            dst_addr0,
1016                                                            /* no_default_route */ 0));
1017
1018           adj0 = ip_get_adjacency (lm, adj_index0);
1019
1020           next0 = adj0->lookup_next_index;
1021
1022           /* Use flow hash to compute multipath adjacency. */
1023           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1024           if (PREDICT_FALSE(adj0->n_adj > 1))
1025             {
1026               flow_hash_config0 = 
1027                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1028
1029               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1030                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1031             }
1032
1033           ASSERT (adj0->n_adj > 0);
1034           ASSERT (is_pow2 (adj0->n_adj));
1035           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1036
1037           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1038
1039           if (is_indirect)
1040             {
1041               /* ARP for next-hop not packet's destination address */
1042               if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
1043                 ip0->dst_address.as_u32 = dst_addr0->as_u32;
1044             }
1045
1046           vlib_increment_combined_counter 
1047               (cm, cpu_index, adj_index0, 1,
1048                vlib_buffer_length_in_chain (vm, p0)
1049                + sizeof(ethernet_header_t));
1050
1051           from += 1;
1052           to_next += 1;
1053           n_left_to_next -= 1;
1054           n_left_from -= 1;
1055
1056           if (PREDICT_FALSE (next0 != next))
1057             {
1058               n_left_to_next += 1;
1059               vlib_put_next_frame (vm, node, next, n_left_to_next);
1060               next = next0;
1061               vlib_get_next_frame (vm, node, next,
1062                                    to_next, n_left_to_next);
1063               to_next[0] = pi0;
1064               to_next += 1;
1065               n_left_to_next -= 1;
1066             }
1067         }
1068
1069       vlib_put_next_frame (vm, node, next, n_left_to_next);
1070     }
1071
1072   if (node->flags & VLIB_NODE_FLAG_TRACE)
1073     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1074
1075   return frame->n_vectors;
1076 }
1077
1078 /** @brief IPv4 lookup node.
1079     @node ip4-lookup
1080
1081     This is the main IPv4 lookup dispatch node.
1082
1083     @param vm vlib_main_t corresponding to the current thread
1084     @param node vlib_node_runtime_t
1085     @param frame vlib_frame_t whose contents should be dispatched
1086
1087     @par Graph mechanics: buffer metadata, next index usage
1088
1089     @em Uses:
1090     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1091         - Indicates the @c sw_if_index value of the interface that the
1092           packet was received on.
1093     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1094         - When the value is @c ~0 then the node performs a longest prefix
1095           match (LPM) for the packet destination address in the FIB attached
1096           to the receive interface.
1097         - Otherwise perform LPM for the packet destination address in the
1098           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1099           value (0, 1, ...) and not a VRF id.
1100
1101     @em Sets:
1102     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1103         - The lookup result adjacency index.
1104
1105     <em>Next Index:</em>
1106     - Dispatches the packet to the node index found in
1107       ip_adjacency_t @c adj->lookup_next_index
1108       (where @c adj is the lookup result adjacency).
1109 */
1110 static uword
1111 ip4_lookup (vlib_main_t * vm,
1112             vlib_node_runtime_t * node,
1113             vlib_frame_t * frame)
1114 {
1115   return ip4_lookup_inline (vm, node, frame,
1116                             /* lookup_for_responses_to_locally_received_packets */ 0,
1117                             /* is_indirect */ 0);
1118
1119 }
1120
1121 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1122                                         ip_adjacency_t * adj,
1123                                         u32 sw_if_index,
1124                                         u32 if_address_index)
1125 {
1126   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1127   ip_lookup_next_t n;
1128   vnet_l3_packet_type_t packet_type;
1129   u32 node_index;
1130
1131   if (hw->hw_class_index == ethernet_hw_interface_class.index
1132       || hw->hw_class_index == srp_hw_interface_class.index)
1133     {
1134       /* 
1135        * We have a bit of a problem in this case. ip4-arp uses
1136        * the rewrite_header.next_index to hand pkts to the
1137        * indicated inteface output node. We can end up in
1138        * ip4_rewrite_local, too, which also pays attention to 
1139        * rewrite_header.next index. Net result: a hack in
1140        * ip4_rewrite_local...
1141        */
1142       n = IP_LOOKUP_NEXT_ARP;
1143       node_index = ip4_arp_node.index;
1144       adj->if_address_index = if_address_index;
1145       adj->arp.next_hop.ip4.as_u32 = 0;
1146       ip46_address_reset(&adj->arp.next_hop);
1147       packet_type = VNET_L3_PACKET_TYPE_ARP;
1148     }
1149   else
1150     {
1151       n = IP_LOOKUP_NEXT_REWRITE;
1152       node_index = ip4_rewrite_node.index;
1153       packet_type = VNET_L3_PACKET_TYPE_IP4;
1154     }
1155
1156   adj->lookup_next_index = n;
1157   vnet_rewrite_for_sw_interface
1158     (vnm,
1159      packet_type,
1160      sw_if_index,
1161      node_index,
1162      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1163      &adj->rewrite_header,
1164      sizeof (adj->rewrite_data));
1165 }
1166
1167 static void
1168 ip4_add_interface_routes (u32 sw_if_index,
1169                           ip4_main_t * im, u32 fib_index,
1170                           ip_interface_address_t * a)
1171 {
1172   vnet_main_t * vnm = vnet_get_main();
1173   ip_lookup_main_t * lm = &im->lookup_main;
1174   ip_adjacency_t * adj;
1175   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1176   ip4_add_del_route_args_t x;
1177   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1178   u32 classify_table_index;
1179
1180   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1181   x.table_index_or_table_id = fib_index;
1182   x.flags = (IP4_ROUTE_FLAG_ADD
1183              | IP4_ROUTE_FLAG_FIB_INDEX
1184              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1185   x.dst_address = address[0];
1186   x.dst_address_length = a->address_length;
1187   x.n_add_adj = 0;
1188   x.add_adj = 0;
1189
1190   a->neighbor_probe_adj_index = ~0;
1191   if (a->address_length < 32)
1192     {
1193       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1194                               &x.adj_index);
1195       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1196       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1197       ip4_add_del_route (im, &x);
1198       a->neighbor_probe_adj_index = x.adj_index;
1199     }
1200   
1201   /* Add e.g. 1.1.1.1/32 as local to this host. */
1202   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1203                           &x.adj_index);
1204   
1205   classify_table_index = ~0;
1206   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1207     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1208   if (classify_table_index != (u32) ~0)
1209     {
1210       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1211       adj->classify.table_index = classify_table_index;
1212     }
1213   else
1214     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1215   
1216   adj->if_address_index = a - lm->if_address_pool;
1217   adj->rewrite_header.sw_if_index = sw_if_index;
1218   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1219   /* 
1220    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1221    * fail an RPF-ish check, but still go thru the rewrite code...
1222    */
1223   adj->rewrite_header.data_bytes = 0;
1224
1225   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1226   x.dst_address_length = 32;
1227   ip4_add_del_route (im, &x);
1228 }
1229
1230 static void
1231 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1232 {
1233   ip4_add_del_route_args_t x;
1234
1235   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1236   x.table_index_or_table_id = fib_index;
1237   x.flags = (IP4_ROUTE_FLAG_DEL
1238              | IP4_ROUTE_FLAG_FIB_INDEX
1239              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1240   x.dst_address = address[0];
1241   x.dst_address_length = address_length;
1242   x.adj_index = ~0;
1243   x.n_add_adj = 0;
1244   x.add_adj = 0;
1245
1246   if (address_length < 32)
1247     ip4_add_del_route (im, &x);
1248
1249   x.dst_address_length = 32;
1250   ip4_add_del_route (im, &x);
1251
1252   ip4_delete_matching_routes (im,
1253                               fib_index,
1254                               IP4_ROUTE_FLAG_FIB_INDEX,
1255                               address,
1256                               address_length);
1257 }
1258
1259 typedef struct {
1260     u32 sw_if_index;
1261     ip4_address_t address;
1262     u32 length;
1263 } ip4_interface_address_t;
1264
1265 static clib_error_t *
1266 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1267                                         u32 sw_if_index,
1268                                         ip4_address_t * new_address,
1269                                         u32 new_length,
1270                                         u32 redistribute,
1271                                         u32 insert_routes,
1272                                         u32 is_del);
1273
1274 static clib_error_t *
1275 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1276                                         u32 sw_if_index,
1277                                         ip4_address_t * address,
1278                                         u32 address_length,
1279                                         u32 redistribute,
1280                                         u32 insert_routes,
1281                                         u32 is_del)
1282 {
1283   vnet_main_t * vnm = vnet_get_main();
1284   ip4_main_t * im = &ip4_main;
1285   ip_lookup_main_t * lm = &im->lookup_main;
1286   clib_error_t * error = 0;
1287   u32 if_address_index, elts_before;
1288   ip4_address_fib_t ip4_af, * addr_fib = 0;
1289
1290   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1291   ip4_addr_fib_init (&ip4_af, address,
1292                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1293   vec_add1 (addr_fib, ip4_af);
1294
1295   /* When adding an address check that it does not conflict with an existing address. */
1296   if (! is_del)
1297     {
1298       ip_interface_address_t * ia;
1299       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1300                                     0 /* honor unnumbered */,
1301       ({
1302         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1303
1304         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1305             || ip4_destination_matches_route (im, x, address, address_length))
1306           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1307                                     format_ip4_address_and_length, address, address_length,
1308                                     format_ip4_address_and_length, x, ia->address_length,
1309                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1310       }));
1311     }
1312
1313   elts_before = pool_elts (lm->if_address_pool);
1314
1315   error = ip_interface_address_add_del
1316     (lm,
1317      sw_if_index,
1318      addr_fib,
1319      address_length,
1320      is_del,
1321      &if_address_index);
1322   if (error)
1323     goto done;
1324   
1325   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1326     {
1327       if (is_del)
1328         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1329                                   address_length);
1330       
1331       else
1332           ip4_add_interface_routes (sw_if_index,
1333                                     im, ip4_af.fib_index,
1334                                     pool_elt_at_index 
1335                                     (lm->if_address_pool, if_address_index));
1336     }
1337
1338   /* If pool did not grow/shrink: add duplicate address. */
1339   if (elts_before != pool_elts (lm->if_address_pool))
1340     {
1341       ip4_add_del_interface_address_callback_t * cb;
1342       vec_foreach (cb, im->add_del_interface_address_callbacks)
1343         cb->function (im, cb->function_opaque, sw_if_index,
1344                       address, address_length,
1345                       if_address_index,
1346                       is_del);
1347     }
1348
1349  done:
1350   vec_free (addr_fib);
1351   return error;
1352 }
1353
1354 clib_error_t *
1355 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1356                                ip4_address_t * address, u32 address_length,
1357                                u32 is_del)
1358 {
1359   return ip4_add_del_interface_address_internal
1360     (vm, sw_if_index, address, address_length,
1361      /* redistribute */ 1,
1362      /* insert_routes */ 1,
1363      is_del);
1364 }
1365
1366 static clib_error_t *
1367 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1368                                 u32 sw_if_index,
1369                                 u32 flags)
1370 {
1371   ip4_main_t * im = &ip4_main;
1372   ip_interface_address_t * ia;
1373   ip4_address_t * a;
1374   u32 is_admin_up, fib_index;
1375   
1376   /* Fill in lookup tables with default table (0). */
1377   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1378   
1379   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1380   
1381   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1382   
1383   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1384
1385   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1386                                 0 /* honor unnumbered */,
1387   ({
1388     a = ip_interface_address_get_address (&im->lookup_main, ia);
1389     if (is_admin_up)
1390       ip4_add_interface_routes (sw_if_index,
1391                                 im, fib_index,
1392                                 ia);
1393     else
1394       ip4_del_interface_routes (im, fib_index,
1395                                 a, ia->address_length);
1396   }));
1397
1398   return 0;
1399 }
1400  
1401 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1402
1403 /* Built-in ip4 unicast rx feature path definition */
1404 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1405   .node_name = "ip4-inacl", 
1406   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
1407   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1408 };
1409
1410 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1411   .node_name = "ip4-source-check-via-rx",
1412   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
1413   .feature_index = 
1414   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1415 };
1416
1417 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1418   .node_name = "ip4-source-check-via-any",
1419   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1420   .feature_index = 
1421   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1422 };
1423
1424 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
1425   .node_name = "ip4-source-and-port-range-check-rx",
1426   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1427   .feature_index =
1428   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1429 };
1430
1431 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1432   .node_name = "ip4-policer-classify",
1433   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
1434   .feature_index =
1435   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1436 };
1437
1438 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1439   .node_name = "ipsec-input-ip4",
1440   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
1441   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1442 };
1443
1444 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1445   .node_name = "vpath-input-ip4",
1446   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
1447   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1448 };
1449
1450 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1451   .node_name = "ip4-lookup",
1452   .runs_before = 0, /* not before any other features */
1453   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1454 };
1455
1456 /* Built-in ip4 multicast rx feature path definition */
1457 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1458   .node_name = "vpath-input-ip4",
1459   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
1460   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1461 };
1462
1463 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1464   .node_name = "ip4-lookup-multicast",
1465   .runs_before = 0, /* not before any other features */
1466   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1467 };
1468
1469 static char * rx_feature_start_nodes[] = 
1470   { "ip4-input", "ip4-input-no-checksum"};
1471
1472 static char * tx_feature_start_nodes[] = 
1473 { "ip4-rewrite-transit"};
1474
1475 /* Source and port-range check ip4 tx feature path definition */
1476 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1477   .node_name = "ip4-source-and-port-range-check-tx",
1478   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
1479   .feature_index =
1480   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
1481
1482 };
1483
1484 /* Built-in ip4 tx feature path definition */
1485 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
1486   .node_name = "interface-output",
1487   .runs_before = 0, /* not before any other features */
1488   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
1489 };
1490
1491
1492 static clib_error_t *
1493 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1494 {
1495   ip_lookup_main_t * lm = &im->lookup_main;
1496   clib_error_t * error;
1497   vnet_cast_t cast;
1498   ip_config_main_t * cm;
1499   vnet_config_main_t * vcm;
1500   char **feature_start_nodes;
1501   int feature_start_len;
1502
1503   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1504     {
1505       cm = &lm->feature_config_mains[cast];
1506       vcm = &cm->config_main;
1507
1508       if (cast < VNET_IP_TX_FEAT)
1509         {
1510           feature_start_nodes = rx_feature_start_nodes;
1511           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
1512         }
1513       else
1514         {
1515           feature_start_nodes = tx_feature_start_nodes;
1516           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
1517         }
1518       
1519       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1520                                          feature_start_nodes,
1521                                          feature_start_len,
1522                                          cast,
1523                                          1 /* is_ip4 */)))
1524         return error;
1525     }
1526
1527   return 0;
1528 }
1529
1530 static clib_error_t *
1531 ip4_sw_interface_add_del (vnet_main_t * vnm,
1532                           u32 sw_if_index,
1533                           u32 is_add)
1534 {
1535   vlib_main_t * vm = vnm->vlib_main;
1536   ip4_main_t * im = &ip4_main;
1537   ip_lookup_main_t * lm = &im->lookup_main;
1538   u32 ci, cast;
1539   u32 feature_index;
1540
1541   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1542     {
1543       ip_config_main_t * cm = &lm->feature_config_mains[cast];
1544       vnet_config_main_t * vcm = &cm->config_main;
1545
1546       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1547       ci = cm->config_index_by_sw_if_index[sw_if_index];
1548
1549       if (cast == VNET_IP_RX_UNICAST_FEAT)
1550         feature_index = im->ip4_unicast_rx_feature_lookup;
1551       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1552         feature_index = im->ip4_multicast_rx_feature_lookup;
1553       else
1554         feature_index = im->ip4_tx_feature_interface_output;
1555
1556       if (is_add)
1557         ci = vnet_config_add_feature (vm, vcm, 
1558                                       ci,
1559                                       feature_index,
1560                                       /* config data */ 0,
1561                                       /* # bytes of config data */ 0);
1562       else
1563         ci = vnet_config_del_feature (vm, vcm,
1564                                       ci,
1565                                       feature_index,
1566                                       /* config data */ 0,
1567                                       /* # bytes of config data */ 0);
1568
1569       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1570       /* 
1571        * note: do not update the tx feature count here.
1572        */
1573     }
1574
1575   return /* no error */ 0;
1576 }
1577
1578 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1579
1580 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1581
1582 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1583   .function = ip4_lookup,
1584   .name = "ip4-lookup",
1585   .vector_size = sizeof (u32),
1586
1587   .format_trace = format_ip4_lookup_trace,
1588
1589   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1590   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1591 };
1592
1593 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
1594
1595 static uword
1596 ip4_indirect (vlib_main_t * vm,
1597                vlib_node_runtime_t * node,
1598                vlib_frame_t * frame)
1599 {
1600   return ip4_lookup_inline (vm, node, frame,
1601                             /* lookup_for_responses_to_locally_received_packets */ 0,
1602                             /* is_indirect */ 1);
1603 }
1604
1605 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1606   .function = ip4_indirect,
1607   .name = "ip4-indirect",
1608   .vector_size = sizeof (u32),
1609   .sibling_of = "ip4-lookup",
1610   .format_trace = format_ip4_lookup_trace,
1611
1612   .n_next_nodes = 0,
1613 };
1614
1615 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
1616
1617
1618 /* Global IP4 main. */
1619 ip4_main_t ip4_main;
1620
1621 clib_error_t *
1622 ip4_lookup_init (vlib_main_t * vm)
1623 {
1624   ip4_main_t * im = &ip4_main;
1625   clib_error_t * error;
1626   uword i;
1627
1628   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1629     {
1630       u32 m;
1631
1632       if (i < 32)
1633         m = pow2_mask (i) << (32 - i);
1634       else 
1635         m = ~0;
1636       im->fib_masks[i] = clib_host_to_net_u32 (m);
1637     }
1638
1639   /* Create FIB with index 0 and table id of 0. */
1640   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1641
1642   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1643
1644   {
1645     pg_node_t * pn;
1646     pn = pg_get_node (ip4_lookup_node.index);
1647     pn->unformat_edit = unformat_pg_ip4_header;
1648   }
1649
1650   {
1651     ethernet_arp_header_t h;
1652
1653     memset (&h, 0, sizeof (h));
1654
1655     /* Set target ethernet address to all zeros. */
1656     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1657
1658 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1659 #define _8(f,v) h.f = v;
1660     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1661     _16 (l3_type, ETHERNET_TYPE_IP4);
1662     _8 (n_l2_address_bytes, 6);
1663     _8 (n_l3_address_bytes, 4);
1664     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1665 #undef _16
1666 #undef _8
1667
1668     vlib_packet_template_init (vm,
1669                                &im->ip4_arp_request_packet_template,
1670                                /* data */ &h,
1671                                sizeof (h),
1672                                /* alloc chunk size */ 8,
1673                                "ip4 arp");
1674   }
1675
1676   error = ip4_feature_init (vm, im);
1677
1678   return error;
1679 }
1680
1681 VLIB_INIT_FUNCTION (ip4_lookup_init);
1682
1683 typedef struct {
1684   /* Adjacency taken. */
1685   u32 adj_index;
1686   u32 flow_hash;
1687   u32 fib_index;
1688
1689   /* Packet data, possibly *after* rewrite. */
1690   u8 packet_data[64 - 1*sizeof(u32)];
1691 } ip4_forward_next_trace_t;
1692
1693 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1694 {
1695   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1696   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1697   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1698   uword indent = format_get_indent (s);
1699   s = format (s, "%U%U",
1700                 format_white_space, indent,
1701                 format_ip4_header, t->packet_data);
1702   return s;
1703 }
1704
1705 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1706 {
1707   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1708   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1709   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1710   vnet_main_t * vnm = vnet_get_main();
1711   ip4_main_t * im = &ip4_main;
1712   uword indent = format_get_indent (s);
1713
1714   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1715               t->fib_index, t->adj_index, format_ip_adjacency,
1716               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1717   s = format (s, "\n%U%U",
1718               format_white_space, indent,
1719               format_ip4_header, t->packet_data);
1720   return s;
1721 }
1722
1723 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1724 {
1725   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1726   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1727   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1728   vnet_main_t * vnm = vnet_get_main();
1729   ip4_main_t * im = &ip4_main;
1730   uword indent = format_get_indent (s);
1731
1732   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1733               t->fib_index, t->adj_index, format_ip_adjacency,
1734               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1735   s = format (s, "\n%U%U",
1736               format_white_space, indent,
1737               format_ip_adjacency_packet_data,
1738               vnm, &im->lookup_main, t->adj_index,
1739               t->packet_data, sizeof (t->packet_data));
1740   return s;
1741 }
1742
1743 /* Common trace function for all ip4-forward next nodes. */
1744 void
1745 ip4_forward_next_trace (vlib_main_t * vm,
1746                         vlib_node_runtime_t * node,
1747                         vlib_frame_t * frame,
1748                         vlib_rx_or_tx_t which_adj_index)
1749 {
1750   u32 * from, n_left;
1751   ip4_main_t * im = &ip4_main;
1752
1753   n_left = frame->n_vectors;
1754   from = vlib_frame_vector_args (frame);
1755   
1756   while (n_left >= 4)
1757     {
1758       u32 bi0, bi1;
1759       vlib_buffer_t * b0, * b1;
1760       ip4_forward_next_trace_t * t0, * t1;
1761
1762       /* Prefetch next iteration. */
1763       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1764       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1765
1766       bi0 = from[0];
1767       bi1 = from[1];
1768
1769       b0 = vlib_get_buffer (vm, bi0);
1770       b1 = vlib_get_buffer (vm, bi1);
1771
1772       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1773         {
1774           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1775           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1776           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1777           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1778               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1779               vec_elt (im->fib_index_by_sw_if_index,
1780                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1781
1782           clib_memcpy (t0->packet_data,
1783                   vlib_buffer_get_current (b0),
1784                   sizeof (t0->packet_data));
1785         }
1786       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1787         {
1788           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1789           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1790           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1791           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1792               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1793               vec_elt (im->fib_index_by_sw_if_index,
1794                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1795           clib_memcpy (t1->packet_data,
1796                   vlib_buffer_get_current (b1),
1797                   sizeof (t1->packet_data));
1798         }
1799       from += 2;
1800       n_left -= 2;
1801     }
1802
1803   while (n_left >= 1)
1804     {
1805       u32 bi0;
1806       vlib_buffer_t * b0;
1807       ip4_forward_next_trace_t * t0;
1808
1809       bi0 = from[0];
1810
1811       b0 = vlib_get_buffer (vm, bi0);
1812
1813       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1814         {
1815           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1816           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1817           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1818           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1819               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1820               vec_elt (im->fib_index_by_sw_if_index,
1821                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1822           clib_memcpy (t0->packet_data,
1823                   vlib_buffer_get_current (b0),
1824                   sizeof (t0->packet_data));
1825         }
1826       from += 1;
1827       n_left -= 1;
1828     }
1829 }
1830
1831 static uword
1832 ip4_drop_or_punt (vlib_main_t * vm,
1833                   vlib_node_runtime_t * node,
1834                   vlib_frame_t * frame,
1835                   ip4_error_t error_code)
1836 {
1837   u32 * buffers = vlib_frame_vector_args (frame);
1838   uword n_packets = frame->n_vectors;
1839
1840   vlib_error_drop_buffers (vm, node,
1841                            buffers,
1842                            /* stride */ 1,
1843                            n_packets,
1844                            /* next */ 0,
1845                            ip4_input_node.index,
1846                            error_code);
1847
1848   if (node->flags & VLIB_NODE_FLAG_TRACE)
1849     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1850
1851   return n_packets;
1852 }
1853
1854 static uword
1855 ip4_drop (vlib_main_t * vm,
1856           vlib_node_runtime_t * node,
1857           vlib_frame_t * frame)
1858 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1859
1860 static uword
1861 ip4_punt (vlib_main_t * vm,
1862           vlib_node_runtime_t * node,
1863           vlib_frame_t * frame)
1864 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1865
1866 static uword
1867 ip4_miss (vlib_main_t * vm,
1868           vlib_node_runtime_t * node,
1869           vlib_frame_t * frame)
1870 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1871
1872 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1873   .function = ip4_drop,
1874   .name = "ip4-drop",
1875   .vector_size = sizeof (u32),
1876
1877   .format_trace = format_ip4_forward_next_trace,
1878
1879   .n_next_nodes = 1,
1880   .next_nodes = {
1881     [0] = "error-drop",
1882   },
1883 };
1884
1885 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1886
1887 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1888   .function = ip4_punt,
1889   .name = "ip4-punt",
1890   .vector_size = sizeof (u32),
1891
1892   .format_trace = format_ip4_forward_next_trace,
1893
1894   .n_next_nodes = 1,
1895   .next_nodes = {
1896     [0] = "error-punt",
1897   },
1898 };
1899
1900 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1901
1902 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1903   .function = ip4_miss,
1904   .name = "ip4-miss",
1905   .vector_size = sizeof (u32),
1906
1907   .format_trace = format_ip4_forward_next_trace,
1908
1909   .n_next_nodes = 1,
1910   .next_nodes = {
1911     [0] = "error-drop",
1912   },
1913 };
1914
1915 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
1916
1917 /* Compute TCP/UDP/ICMP4 checksum in software. */
1918 u16
1919 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1920                               ip4_header_t * ip0)
1921 {
1922   ip_csum_t sum0;
1923   u32 ip_header_length, payload_length_host_byte_order;
1924   u32 n_this_buffer, n_bytes_left;
1925   u16 sum16;
1926   void * data_this_buffer;
1927   
1928   /* Initialize checksum with ip header. */
1929   ip_header_length = ip4_header_bytes (ip0);
1930   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1931   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1932
1933   if (BITS (uword) == 32)
1934     {
1935       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1936       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1937     }
1938   else
1939     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1940
1941   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1942   data_this_buffer = (void *) ip0 + ip_header_length;
1943   if (n_this_buffer + ip_header_length > p0->current_length)
1944     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1945   while (1)
1946     {
1947       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1948       n_bytes_left -= n_this_buffer;
1949       if (n_bytes_left == 0)
1950         break;
1951
1952       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1953       p0 = vlib_get_buffer (vm, p0->next_buffer);
1954       data_this_buffer = vlib_buffer_get_current (p0);
1955       n_this_buffer = p0->current_length;
1956     }
1957
1958   sum16 = ~ ip_csum_fold (sum0);
1959
1960   return sum16;
1961 }
1962
1963 static u32
1964 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1965 {
1966   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1967   udp_header_t * udp0;
1968   u16 sum16;
1969
1970   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1971           || ip0->protocol == IP_PROTOCOL_UDP);
1972
1973   udp0 = (void *) (ip0 + 1);
1974   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1975     {
1976       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1977                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1978       return p0->flags;
1979     }
1980
1981   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1982
1983   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1984                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1985
1986   return p0->flags;
1987 }
1988
1989 static uword
1990 ip4_local (vlib_main_t * vm,
1991            vlib_node_runtime_t * node,
1992            vlib_frame_t * frame)
1993 {
1994   ip4_main_t * im = &ip4_main;
1995   ip_lookup_main_t * lm = &im->lookup_main;
1996   ip_local_next_t next_index;
1997   u32 * from, * to_next, n_left_from, n_left_to_next;
1998   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1999
2000   from = vlib_frame_vector_args (frame);
2001   n_left_from = frame->n_vectors;
2002   next_index = node->cached_next_index;
2003   
2004   if (node->flags & VLIB_NODE_FLAG_TRACE)
2005     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2006
2007   while (n_left_from > 0)
2008     {
2009       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2010
2011       while (n_left_from >= 4 && n_left_to_next >= 2)
2012         {
2013           vlib_buffer_t * p0, * p1;
2014           ip4_header_t * ip0, * ip1;
2015           udp_header_t * udp0, * udp1;
2016           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2017           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2018           ip_adjacency_t * adj0, * adj1;
2019           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2020           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2021           i32 len_diff0, len_diff1;
2022           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2023           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2024           u8 enqueue_code;
2025       
2026           pi0 = to_next[0] = from[0];
2027           pi1 = to_next[1] = from[1];
2028           from += 2;
2029           n_left_from -= 2;
2030           to_next += 2;
2031           n_left_to_next -= 2;
2032       
2033           p0 = vlib_get_buffer (vm, pi0);
2034           p1 = vlib_get_buffer (vm, pi1);
2035
2036           ip0 = vlib_buffer_get_current (p0);
2037           ip1 = vlib_buffer_get_current (p1);
2038
2039           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2040                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2041           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2042                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2043
2044           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2045           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2046
2047           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2048
2049           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2050           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2051
2052           /* Treat IP frag packets as "experimental" protocol for now
2053              until support of IP frag reassembly is implemented */
2054           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2055           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
2056           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2057           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2058           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2059           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2060
2061           flags0 = p0->flags;
2062           flags1 = p1->flags;
2063
2064           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2065           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2066
2067           udp0 = ip4_next_header (ip0);
2068           udp1 = ip4_next_header (ip1);
2069
2070           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2071           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2072           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2073
2074           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2075           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2076
2077           /* Verify UDP length. */
2078           ip_len0 = clib_net_to_host_u16 (ip0->length);
2079           ip_len1 = clib_net_to_host_u16 (ip1->length);
2080           udp_len0 = clib_net_to_host_u16 (udp0->length);
2081           udp_len1 = clib_net_to_host_u16 (udp1->length);
2082
2083           len_diff0 = ip_len0 - udp_len0;
2084           len_diff1 = ip_len1 - udp_len1;
2085
2086           len_diff0 = is_udp0 ? len_diff0 : 0;
2087           len_diff1 = is_udp1 ? len_diff1 : 0;
2088
2089           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2090                                 & good_tcp_udp0 & good_tcp_udp1)))
2091             {
2092               if (is_tcp_udp0)
2093                 {
2094                   if (is_tcp_udp0
2095                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2096                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2097                   good_tcp_udp0 =
2098                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2099                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2100                 }
2101               if (is_tcp_udp1)
2102                 {
2103                   if (is_tcp_udp1
2104                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2105                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2106                   good_tcp_udp1 =
2107                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2108                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2109                 }
2110             }
2111
2112           good_tcp_udp0 &= len_diff0 >= 0;
2113           good_tcp_udp1 &= len_diff1 >= 0;
2114
2115           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2116           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2117
2118           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2119
2120           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2121           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2122
2123           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2124           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2125                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2126                     : error0);
2127           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2128                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2129                     : error1);
2130
2131           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2132           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2133
2134           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2135           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2136
2137           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2138           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2139
2140           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2141                                                            &ip0->src_address,
2142                                                            /* no_default_route */ 1));
2143           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2144                                                            &ip1->src_address,
2145                                                            /* no_default_route */ 1));
2146
2147           adj0 = ip_get_adjacency (lm, adj_index0);
2148           adj1 = ip_get_adjacency (lm, adj_index1);
2149
2150           /* 
2151            * Must have a route to source otherwise we drop the packet.
2152            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2153            */
2154           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2155                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2156                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2157                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2158                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2159                     ? IP4_ERROR_SRC_LOOKUP_MISS
2160                     : error0);
2161           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2162                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2163                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2164                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2165                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2166                     ? IP4_ERROR_SRC_LOOKUP_MISS
2167                     : error1);
2168
2169           next0 = lm->local_next_by_ip_protocol[proto0];
2170           next1 = lm->local_next_by_ip_protocol[proto1];
2171
2172           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2173           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2174
2175           p0->error = error0 ? error_node->errors[error0] : 0;
2176           p1->error = error1 ? error_node->errors[error1] : 0;
2177
2178           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2179
2180           if (PREDICT_FALSE (enqueue_code != 0))
2181             {
2182               switch (enqueue_code)
2183                 {
2184                 case 1:
2185                   /* A B A */
2186                   to_next[-2] = pi1;
2187                   to_next -= 1;
2188                   n_left_to_next += 1;
2189                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2190                   break;
2191
2192                 case 2:
2193                   /* A A B */
2194                   to_next -= 1;
2195                   n_left_to_next += 1;
2196                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2197                   break;
2198
2199                 case 3:
2200                   /* A B B or A B C */
2201                   to_next -= 2;
2202                   n_left_to_next += 2;
2203                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2204                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2205                   if (next0 == next1)
2206                     {
2207                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2208                       next_index = next1;
2209                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2210                     }
2211                   break;
2212                 }
2213             }
2214         }
2215
2216       while (n_left_from > 0 && n_left_to_next > 0)
2217         {
2218           vlib_buffer_t * p0;
2219           ip4_header_t * ip0;
2220           udp_header_t * udp0;
2221           ip4_fib_mtrie_t * mtrie0;
2222           ip4_fib_mtrie_leaf_t leaf0;
2223           ip_adjacency_t * adj0;
2224           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2225           i32 len_diff0;
2226           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2227       
2228           pi0 = to_next[0] = from[0];
2229           from += 1;
2230           n_left_from -= 1;
2231           to_next += 1;
2232           n_left_to_next -= 1;
2233       
2234           p0 = vlib_get_buffer (vm, pi0);
2235
2236           ip0 = vlib_buffer_get_current (p0);
2237
2238           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2239                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2240
2241           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2242
2243           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2244
2245           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2246
2247           /* Treat IP frag packets as "experimental" protocol for now
2248              until support of IP frag reassembly is implemented */
2249           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2250           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2251           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2252
2253           flags0 = p0->flags;
2254
2255           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2256
2257           udp0 = ip4_next_header (ip0);
2258
2259           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2260           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2261
2262           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2263
2264           /* Verify UDP length. */
2265           ip_len0 = clib_net_to_host_u16 (ip0->length);
2266           udp_len0 = clib_net_to_host_u16 (udp0->length);
2267
2268           len_diff0 = ip_len0 - udp_len0;
2269
2270           len_diff0 = is_udp0 ? len_diff0 : 0;
2271
2272           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2273             {
2274               if (is_tcp_udp0)
2275                 {
2276                   if (is_tcp_udp0
2277                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2278                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2279                   good_tcp_udp0 =
2280                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2281                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2282                 }
2283             }
2284
2285           good_tcp_udp0 &= len_diff0 >= 0;
2286
2287           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2288
2289           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2290
2291           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2292
2293           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2294           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2295                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2296                     : error0);
2297
2298           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2299
2300           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2301           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2302
2303           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2304                                                            &ip0->src_address,
2305                                                            /* no_default_route */ 1));
2306
2307           adj0 = ip_get_adjacency (lm, adj_index0);
2308
2309           /* Must have a route to source otherwise we drop the packet. */
2310           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2311                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2312                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2313                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2314                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2315                     ? IP4_ERROR_SRC_LOOKUP_MISS
2316                     : error0);
2317
2318           next0 = lm->local_next_by_ip_protocol[proto0];
2319
2320           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2321
2322           p0->error = error0? error_node->errors[error0] : 0;
2323
2324           if (PREDICT_FALSE (next0 != next_index))
2325             {
2326               n_left_to_next += 1;
2327               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2328
2329               next_index = next0;
2330               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2331               to_next[0] = pi0;
2332               to_next += 1;
2333               n_left_to_next -= 1;
2334             }
2335         }
2336   
2337       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2338     }
2339
2340   return frame->n_vectors;
2341 }
2342
2343 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2344   .function = ip4_local,
2345   .name = "ip4-local",
2346   .vector_size = sizeof (u32),
2347
2348   .format_trace = format_ip4_forward_next_trace,
2349
2350   .n_next_nodes = IP_LOCAL_N_NEXT,
2351   .next_nodes = {
2352     [IP_LOCAL_NEXT_DROP] = "error-drop",
2353     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2354     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2355     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2356   },
2357 };
2358
2359 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
2360
2361 void ip4_register_protocol (u32 protocol, u32 node_index)
2362 {
2363   vlib_main_t * vm = vlib_get_main();
2364   ip4_main_t * im = &ip4_main;
2365   ip_lookup_main_t * lm = &im->lookup_main;
2366
2367   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2368   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2369 }
2370
2371 static clib_error_t *
2372 show_ip_local_command_fn (vlib_main_t * vm,
2373                           unformat_input_t * input,
2374                          vlib_cli_command_t * cmd)
2375 {
2376   ip4_main_t * im = &ip4_main;
2377   ip_lookup_main_t * lm = &im->lookup_main;
2378   int i;
2379
2380   vlib_cli_output (vm, "Protocols handled by ip4_local");
2381   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2382     {
2383       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2384         vlib_cli_output (vm, "%d", i);
2385     }
2386   return 0;
2387 }
2388
2389
2390
2391 VLIB_CLI_COMMAND (show_ip_local, static) = {
2392   .path = "show ip local",
2393   .function = show_ip_local_command_fn,
2394   .short_help = "Show ip local protocol table",
2395 };
2396
2397 static uword
2398 ip4_arp (vlib_main_t * vm,
2399          vlib_node_runtime_t * node,
2400          vlib_frame_t * frame)
2401 {
2402   vnet_main_t * vnm = vnet_get_main();
2403   ip4_main_t * im = &ip4_main;
2404   ip_lookup_main_t * lm = &im->lookup_main;
2405   u32 * from, * to_next_drop;
2406   uword n_left_from, n_left_to_next_drop, next_index;
2407   static f64 time_last_seed_change = -1e100;
2408   static u32 hash_seeds[3];
2409   static uword hash_bitmap[256 / BITS (uword)]; 
2410   f64 time_now;
2411
2412   if (node->flags & VLIB_NODE_FLAG_TRACE)
2413     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2414
2415   time_now = vlib_time_now (vm);
2416   if (time_now - time_last_seed_change > 1e-3)
2417     {
2418       uword i;
2419       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2420                                              sizeof (hash_seeds));
2421       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2422         hash_seeds[i] = r[i];
2423
2424       /* Mark all hash keys as been no-seen before. */
2425       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2426         hash_bitmap[i] = 0;
2427
2428       time_last_seed_change = time_now;
2429     }
2430
2431   from = vlib_frame_vector_args (frame);
2432   n_left_from = frame->n_vectors;
2433   next_index = node->cached_next_index;
2434   if (next_index == IP4_ARP_NEXT_DROP)
2435     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2436
2437   while (n_left_from > 0)
2438     {
2439       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2440                            to_next_drop, n_left_to_next_drop);
2441
2442       while (n_left_from > 0 && n_left_to_next_drop > 0)
2443         {
2444           vlib_buffer_t * p0;
2445           ip4_header_t * ip0;
2446           ethernet_header_t * eh0;
2447           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2448           uword bm0;
2449           ip_adjacency_t * adj0;
2450
2451           pi0 = from[0];
2452
2453           p0 = vlib_get_buffer (vm, pi0);
2454
2455           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2456           adj0 = ip_get_adjacency (lm, adj_index0);
2457           ip0 = vlib_buffer_get_current (p0);
2458
2459           /* If packet destination is not local, send ARP to next hop */
2460           if (adj0->arp.next_hop.ip4.as_u32)
2461             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2462
2463           /* 
2464            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2465            * rewrite to this packet, we need to skip it here.
2466            * Note, to distinguish from src IP addr *.8.6.*, we
2467            * check for a bcast eth dest instead of IPv4 version.
2468            */
2469           eh0 = (ethernet_header_t*)ip0;
2470           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2471             {
2472               u32 vlan_num = 0;
2473               u16 * etype = &eh0->type;
2474               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2475                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2476                 {
2477                   vlan_num += 1;
2478                   etype += 2; //vlan tag also 16 bits, same as etype
2479                 }
2480               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2481                 {
2482                   vlib_buffer_advance (
2483                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2484                   ip0 = vlib_buffer_get_current (p0);
2485                 }
2486             }
2487
2488           a0 = hash_seeds[0];
2489           b0 = hash_seeds[1];
2490           c0 = hash_seeds[2];
2491
2492           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2493           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2494
2495           a0 ^= ip0->dst_address.data_u32;
2496           b0 ^= sw_if_index0;
2497
2498           hash_v3_finalize32 (a0, b0, c0);
2499
2500           c0 &= BITS (hash_bitmap) - 1;
2501           c0 = c0 / BITS (uword);
2502           m0 = (uword) 1 << (c0 % BITS (uword));
2503
2504           bm0 = hash_bitmap[c0];
2505           drop0 = (bm0 & m0) != 0;
2506
2507           /* Mark it as seen. */
2508           hash_bitmap[c0] = bm0 | m0;
2509
2510           from += 1;
2511           n_left_from -= 1;
2512           to_next_drop[0] = pi0;
2513           to_next_drop += 1;
2514           n_left_to_next_drop -= 1;
2515
2516           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2517
2518           if (drop0)
2519             continue;
2520
2521           /* 
2522            * Can happen if the control-plane is programming tables
2523            * with traffic flowing; at least that's today's lame excuse.
2524            */
2525           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2526             {
2527               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2528             }
2529           else
2530           /* Send ARP request. */
2531           {
2532             u32 bi0 = 0;
2533             vlib_buffer_t * b0;
2534             ethernet_arp_header_t * h0;
2535             vnet_hw_interface_t * hw_if0;
2536
2537             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2538
2539             /* Add rewrite/encap string for ARP packet. */
2540             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2541
2542             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2543
2544             /* Src ethernet address in ARP header. */
2545             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2546                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2547
2548             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2549                 //No source address available
2550                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2551                 vlib_buffer_free(vm, &bi0, 1);
2552                 continue;
2553             }
2554
2555             /* Copy in destination address we are requesting. */
2556             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2557
2558             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2559             b0 = vlib_get_buffer (vm, bi0);
2560             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2561
2562             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2563
2564             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2565           }
2566         }
2567
2568       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2569     }
2570
2571   return frame->n_vectors;
2572 }
2573
2574 static char * ip4_arp_error_strings[] = {
2575   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2576   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2577   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2578   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2579   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2580   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2581 };
2582
2583 VLIB_REGISTER_NODE (ip4_arp_node) = {
2584   .function = ip4_arp,
2585   .name = "ip4-arp",
2586   .vector_size = sizeof (u32),
2587
2588   .format_trace = format_ip4_forward_next_trace,
2589
2590   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2591   .error_strings = ip4_arp_error_strings,
2592
2593   .n_next_nodes = IP4_ARP_N_NEXT,
2594   .next_nodes = {
2595     [IP4_ARP_NEXT_DROP] = "error-drop",
2596   },
2597 };
2598
2599 #define foreach_notrace_ip4_arp_error           \
2600 _(DROP)                                         \
2601 _(REQUEST_SENT)                                 \
2602 _(REPLICATE_DROP)                               \
2603 _(REPLICATE_FAIL)
2604
2605 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2606 {
2607   vlib_node_runtime_t *rt = 
2608     vlib_node_get_runtime (vm, ip4_arp_node.index);
2609
2610   /* don't trace ARP request packets */
2611 #define _(a)                                    \
2612     vnet_pcap_drop_trace_filter_add_del         \
2613         (rt->errors[IP4_ARP_ERROR_##a],         \
2614          1 /* is_add */);
2615     foreach_notrace_ip4_arp_error;
2616 #undef _
2617   return 0;
2618 }
2619
2620 VLIB_INIT_FUNCTION(arp_notrace_init);
2621
2622
2623 /* Send an ARP request to see if given destination is reachable on given interface. */
2624 clib_error_t *
2625 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2626 {
2627   vnet_main_t * vnm = vnet_get_main();
2628   ip4_main_t * im = &ip4_main;
2629   ethernet_arp_header_t * h;
2630   ip4_address_t * src;
2631   ip_interface_address_t * ia;
2632   ip_adjacency_t * adj;
2633   vnet_hw_interface_t * hi;
2634   vnet_sw_interface_t * si;
2635   vlib_buffer_t * b;
2636   u32 bi = 0;
2637
2638   si = vnet_get_sw_interface (vnm, sw_if_index);
2639
2640   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2641     {
2642       return clib_error_return (0, "%U: interface %U down",
2643                                 format_ip4_address, dst, 
2644                                 format_vnet_sw_if_index_name, vnm, 
2645                                 sw_if_index);
2646     }
2647
2648   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2649   if (! src)
2650     {
2651       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2652       return clib_error_return 
2653         (0, "no matching interface address for destination %U (interface %U)",
2654          format_ip4_address, dst,
2655          format_vnet_sw_if_index_name, vnm, sw_if_index);
2656     }
2657
2658   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2659
2660   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2661
2662   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2663
2664   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2665
2666   h->ip4_over_ethernet[0].ip4 = src[0];
2667   h->ip4_over_ethernet[1].ip4 = dst[0];
2668
2669   b = vlib_get_buffer (vm, bi);
2670   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2671
2672   /* Add encapsulation string for software interface (e.g. ethernet header). */
2673   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2674   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2675
2676   {
2677     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2678     u32 * to_next = vlib_frame_vector_args (f);
2679     to_next[0] = bi;
2680     f->n_vectors = 1;
2681     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2682   }
2683
2684   return /* no error */ 0;
2685 }
2686
2687 typedef enum {
2688   IP4_REWRITE_NEXT_DROP,
2689   IP4_REWRITE_NEXT_ARP,
2690   IP4_REWRITE_NEXT_ICMP_ERROR,
2691 } ip4_rewrite_next_t;
2692
2693 always_inline uword
2694 ip4_rewrite_inline (vlib_main_t * vm,
2695                     vlib_node_runtime_t * node,
2696                     vlib_frame_t * frame,
2697                     int rewrite_for_locally_received_packets)
2698 {
2699   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2700   u32 * from = vlib_frame_vector_args (frame);
2701   u32 n_left_from, n_left_to_next, * to_next, next_index;
2702   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2703   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2704   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2705
2706   n_left_from = frame->n_vectors;
2707   next_index = node->cached_next_index;
2708   u32 cpu_index = os_get_cpu_number();
2709   
2710   while (n_left_from > 0)
2711     {
2712       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2713
2714       while (n_left_from >= 4 && n_left_to_next >= 2)
2715         {
2716           ip_adjacency_t * adj0, * adj1;
2717           vlib_buffer_t * p0, * p1;
2718           ip4_header_t * ip0, * ip1;
2719           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2720           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2721           u32 next0_override, next1_override;
2722           u32 tx_sw_if_index0, tx_sw_if_index1;
2723       
2724           if (rewrite_for_locally_received_packets)
2725               next0_override = next1_override = 0;
2726
2727           /* Prefetch next iteration. */
2728           {
2729             vlib_buffer_t * p2, * p3;
2730
2731             p2 = vlib_get_buffer (vm, from[2]);
2732             p3 = vlib_get_buffer (vm, from[3]);
2733
2734             vlib_prefetch_buffer_header (p2, STORE);
2735             vlib_prefetch_buffer_header (p3, STORE);
2736
2737             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2738             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2739           }
2740
2741           pi0 = to_next[0] = from[0];
2742           pi1 = to_next[1] = from[1];
2743
2744           from += 2;
2745           n_left_from -= 2;
2746           to_next += 2;
2747           n_left_to_next -= 2;
2748       
2749           p0 = vlib_get_buffer (vm, pi0);
2750           p1 = vlib_get_buffer (vm, pi1);
2751
2752           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2753           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2754
2755           /* We should never rewrite a pkt using the MISS adjacency */
2756           ASSERT(adj_index0 && adj_index1);
2757
2758           ip0 = vlib_buffer_get_current (p0);
2759           ip1 = vlib_buffer_get_current (p1);
2760
2761           error0 = error1 = IP4_ERROR_NONE;
2762           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2763
2764           /* Decrement TTL & update checksum.
2765              Works either endian, so no need for byte swap. */
2766           if (! rewrite_for_locally_received_packets)
2767             {
2768               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2769
2770               /* Input node should have reject packets with ttl 0. */
2771               ASSERT (ip0->ttl > 0);
2772               ASSERT (ip1->ttl > 0);
2773
2774               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2775               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2776
2777               checksum0 += checksum0 >= 0xffff;
2778               checksum1 += checksum1 >= 0xffff;
2779
2780               ip0->checksum = checksum0;
2781               ip1->checksum = checksum1;
2782
2783               ttl0 -= 1;
2784               ttl1 -= 1;
2785
2786               ip0->ttl = ttl0;
2787               ip1->ttl = ttl1;
2788
2789               /*
2790                * If the ttl drops below 1 when forwarding, generate
2791                * an ICMP response.
2792                */
2793               if (PREDICT_FALSE(ttl0 <= 0))
2794                 {
2795                   error0 = IP4_ERROR_TIME_EXPIRED;
2796                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2797                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2798                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2799                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2800                 }
2801               if (PREDICT_FALSE(ttl1 <= 0))
2802                 {
2803                   error1 = IP4_ERROR_TIME_EXPIRED;
2804                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2805                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2806                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2807                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2808                 }
2809
2810               /* Verify checksum. */
2811               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2812               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2813             }
2814
2815           /* Rewrite packet header and updates lengths. */
2816           adj0 = ip_get_adjacency (lm, adj_index0);
2817           adj1 = ip_get_adjacency (lm, adj_index1);
2818       
2819           if (rewrite_for_locally_received_packets)
2820             {
2821               /*
2822                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2823                * we end up here with a local adjacency in hand
2824                * The local adj rewrite data is 0xfefe on purpose.
2825                * Bad engineer, no donut for you.
2826                */
2827               if (PREDICT_FALSE(adj0->lookup_next_index 
2828                                 == IP_LOOKUP_NEXT_LOCAL))
2829                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2830               if (PREDICT_FALSE(adj0->lookup_next_index
2831                                 == IP_LOOKUP_NEXT_ARP))
2832                 next0_override = IP4_REWRITE_NEXT_ARP;
2833               if (PREDICT_FALSE(adj1->lookup_next_index 
2834                                 == IP_LOOKUP_NEXT_LOCAL))
2835                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2836               if (PREDICT_FALSE(adj1->lookup_next_index
2837                                 == IP_LOOKUP_NEXT_ARP))
2838                 next1_override = IP4_REWRITE_NEXT_ARP;
2839             }
2840
2841           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2842           rw_len0 = adj0[0].rewrite_header.data_bytes;
2843           rw_len1 = adj1[0].rewrite_header.data_bytes;
2844           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2845           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2846
2847           /* Check MTU of outgoing interface. */
2848           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2849                     ? IP4_ERROR_MTU_EXCEEDED
2850                     : error0);
2851           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2852                     ? IP4_ERROR_MTU_EXCEEDED
2853                     : error1);
2854
2855           next0 = (error0 == IP4_ERROR_NONE)
2856             ? adj0[0].rewrite_header.next_index : next0;
2857
2858           if (rewrite_for_locally_received_packets)
2859               next0 = next0 && next0_override ? next0_override : next0;
2860
2861           next1 = (error1 == IP4_ERROR_NONE)
2862             ? adj1[0].rewrite_header.next_index : next1;
2863
2864           if (rewrite_for_locally_received_packets)
2865               next1 = next1 && next1_override ? next1_override : next1;
2866
2867           /* 
2868            * We've already accounted for an ethernet_header_t elsewhere
2869            */
2870           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2871               vlib_increment_combined_counter 
2872                   (&lm->adjacency_counters,
2873                    cpu_index, adj_index0, 
2874                    /* packet increment */ 0,
2875                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2876
2877           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2878               vlib_increment_combined_counter 
2879                   (&lm->adjacency_counters,
2880                    cpu_index, adj_index1, 
2881                    /* packet increment */ 0,
2882                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2883
2884           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2885            * to see the IP headerr */
2886           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2887             {
2888               p0->current_data -= rw_len0;
2889               p0->current_length += rw_len0;
2890               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2891               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2892                   tx_sw_if_index0;
2893
2894               if (PREDICT_FALSE 
2895                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2896                                     tx_sw_if_index0)))
2897                 {
2898                   p0->current_config_index = 
2899                     vec_elt (cm->config_index_by_sw_if_index, 
2900                              tx_sw_if_index0);
2901                   vnet_get_config_data (&cm->config_main,
2902                                         &p0->current_config_index,
2903                                         &next0,
2904                                         /* # bytes of config data */ 0);
2905                 }
2906             }
2907           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2908             {
2909               p1->current_data -= rw_len1;
2910               p1->current_length += rw_len1;
2911
2912               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2913               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2914                   tx_sw_if_index1;
2915
2916               if (PREDICT_FALSE 
2917                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2918                                     tx_sw_if_index1)))
2919                 {
2920                   p1->current_config_index = 
2921                     vec_elt (cm->config_index_by_sw_if_index, 
2922                              tx_sw_if_index1);
2923                   vnet_get_config_data (&cm->config_main,
2924                                         &p1->current_config_index,
2925                                         &next1,
2926                                         /* # bytes of config data */ 0);
2927                 }
2928             }
2929
2930           /* Guess we are only writing on simple Ethernet header. */
2931           vnet_rewrite_two_headers (adj0[0], adj1[0],
2932                                     ip0, ip1,
2933                                     sizeof (ethernet_header_t));
2934       
2935           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2936                                            to_next, n_left_to_next,
2937                                            pi0, pi1, next0, next1);
2938         }
2939
2940       while (n_left_from > 0 && n_left_to_next > 0)
2941         {
2942           ip_adjacency_t * adj0;
2943           vlib_buffer_t * p0;
2944           ip4_header_t * ip0;
2945           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2946           u32 next0_override;
2947           u32 tx_sw_if_index0;
2948       
2949           if (rewrite_for_locally_received_packets)
2950               next0_override = 0;
2951
2952           pi0 = to_next[0] = from[0];
2953
2954           p0 = vlib_get_buffer (vm, pi0);
2955
2956           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2957
2958           /* We should never rewrite a pkt using the MISS adjacency */
2959           ASSERT(adj_index0);
2960
2961           adj0 = ip_get_adjacency (lm, adj_index0);
2962       
2963           ip0 = vlib_buffer_get_current (p0);
2964
2965           error0 = IP4_ERROR_NONE;
2966           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2967
2968           /* Decrement TTL & update checksum. */
2969           if (! rewrite_for_locally_received_packets)
2970             {
2971               i32 ttl0 = ip0->ttl;
2972
2973               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2974
2975               checksum0 += checksum0 >= 0xffff;
2976
2977               ip0->checksum = checksum0;
2978
2979               ASSERT (ip0->ttl > 0);
2980
2981               ttl0 -= 1;
2982
2983               ip0->ttl = ttl0;
2984
2985               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2986
2987               if (PREDICT_FALSE(ttl0 <= 0))
2988                 {
2989                   /*
2990                    * If the ttl drops below 1 when forwarding, generate
2991                    * an ICMP response.
2992                    */
2993                   error0 = IP4_ERROR_TIME_EXPIRED;
2994                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2995                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2996                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2997                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2998                 }
2999             }
3000
3001           if (rewrite_for_locally_received_packets)
3002             {
3003               /*
3004                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
3005                * we end up here with a local adjacency in hand
3006                * The local adj rewrite data is 0xfefe on purpose.
3007                * Bad engineer, no donut for you.
3008                */
3009               if (PREDICT_FALSE(adj0->lookup_next_index 
3010                                 == IP_LOOKUP_NEXT_LOCAL))
3011                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
3012               /* 
3013                * We have to override the next_index in ARP adjacencies,
3014                * because they're set up for ip4-arp, not this node...
3015                */
3016               if (PREDICT_FALSE(adj0->lookup_next_index
3017                                 == IP_LOOKUP_NEXT_ARP))
3018                 next0_override = IP4_REWRITE_NEXT_ARP;
3019             }
3020
3021           /* Guess we are only writing on simple Ethernet header. */
3022           vnet_rewrite_one_header (adj0[0], ip0, 
3023                                    sizeof (ethernet_header_t));
3024           
3025           /* Update packet buffer attributes/set output interface. */
3026           rw_len0 = adj0[0].rewrite_header.data_bytes;
3027           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
3028           
3029           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
3030               vlib_increment_combined_counter 
3031                   (&lm->adjacency_counters,
3032                    cpu_index, adj_index0, 
3033                    /* packet increment */ 0,
3034                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
3035           
3036           /* Check MTU of outgoing interface. */
3037           error0 = (vlib_buffer_length_in_chain (vm, p0) 
3038                     > adj0[0].rewrite_header.max_l3_packet_bytes
3039                     ? IP4_ERROR_MTU_EXCEEDED
3040                     : error0);
3041
3042           p0->error = error_node->errors[error0];
3043
3044           /* Don't adjust the buffer for ttl issue; icmp-error node wants
3045            * to see the IP headerr */
3046           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
3047             {
3048               p0->current_data -= rw_len0;
3049               p0->current_length += rw_len0;
3050               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
3051
3052               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
3053               next0 = adj0[0].rewrite_header.next_index;
3054
3055               if (PREDICT_FALSE 
3056                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
3057                                     tx_sw_if_index0)))
3058                   {
3059                     p0->current_config_index = 
3060                       vec_elt (cm->config_index_by_sw_if_index, 
3061                                tx_sw_if_index0);
3062                     vnet_get_config_data (&cm->config_main,
3063                                           &p0->current_config_index,
3064                                           &next0,
3065                                           /* # bytes of config data */ 0);
3066                   }
3067             }
3068
3069           if (rewrite_for_locally_received_packets)
3070               next0 = next0 && next0_override ? next0_override : next0;
3071
3072           from += 1;
3073           n_left_from -= 1;
3074           to_next += 1;
3075           n_left_to_next -= 1;
3076       
3077           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3078                                            to_next, n_left_to_next,
3079                                            pi0, next0);
3080         }
3081   
3082       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3083     }
3084
3085   /* Need to do trace after rewrites to pick up new packet data. */
3086   if (node->flags & VLIB_NODE_FLAG_TRACE)
3087     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3088
3089   return frame->n_vectors;
3090 }
3091
3092
3093 /** @brief IPv4 transit rewrite node.
3094     @node ip4-rewrite-transit
3095
3096     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
3097     header checksum, fetch the ip adjacency, check the outbound mtu,
3098     apply the adjacency rewrite, and send pkts to the adjacency
3099     rewrite header's rewrite_next_index.
3100
3101     @param vm vlib_main_t corresponding to the current thread
3102     @param node vlib_node_runtime_t
3103     @param frame vlib_frame_t whose contents should be dispatched
3104
3105     @par Graph mechanics: buffer metadata, next index usage
3106
3107     @em Uses:
3108     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3109         - the rewrite adjacency index
3110     - <code>adj->lookup_next_index</code>
3111         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3112           the packet will be dropped. 
3113     - <code>adj->rewrite_header</code>
3114         - Rewrite string length, rewrite string, next_index
3115
3116     @em Sets:
3117     - <code>b->current_data, b->current_length</code>
3118         - Updated net of applying the rewrite string
3119
3120     <em>Next Indices:</em>
3121     - <code> adj->rewrite_header.next_index </code>
3122       or @c error-drop 
3123 */
3124 static uword
3125 ip4_rewrite_transit (vlib_main_t * vm,
3126                      vlib_node_runtime_t * node,
3127                      vlib_frame_t * frame)
3128 {
3129   return ip4_rewrite_inline (vm, node, frame,
3130                              /* rewrite_for_locally_received_packets */ 0);
3131 }
3132
3133 /** @brief IPv4 local rewrite node.
3134     @node ip4-rewrite-local
3135
3136     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3137     the outbound interface mtu, apply the adjacency rewrite, and send
3138     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3139     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3140     dst = interface addr."
3141
3142     @param vm vlib_main_t corresponding to the current thread
3143     @param node vlib_node_runtime_t
3144     @param frame vlib_frame_t whose contents should be dispatched
3145
3146     @par Graph mechanics: buffer metadata, next index usage
3147
3148     @em Uses:
3149     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3150         - the rewrite adjacency index
3151     - <code>adj->lookup_next_index</code>
3152         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3153           the packet will be dropped. 
3154     - <code>adj->rewrite_header</code>
3155         - Rewrite string length, rewrite string, next_index
3156
3157     @em Sets:
3158     - <code>b->current_data, b->current_length</code>
3159         - Updated net of applying the rewrite string
3160
3161     <em>Next Indices:</em>
3162     - <code> adj->rewrite_header.next_index </code>
3163       or @c error-drop 
3164 */
3165
3166 static uword
3167 ip4_rewrite_local (vlib_main_t * vm,
3168                    vlib_node_runtime_t * node,
3169                    vlib_frame_t * frame)
3170 {
3171   return ip4_rewrite_inline (vm, node, frame,
3172                              /* rewrite_for_locally_received_packets */ 1);
3173 }
3174
3175 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3176   .function = ip4_rewrite_transit,
3177   .name = "ip4-rewrite-transit",
3178   .vector_size = sizeof (u32),
3179
3180   .format_trace = format_ip4_rewrite_trace,
3181
3182   .n_next_nodes = 3,
3183   .next_nodes = {
3184     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3185     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3186     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3187   },
3188 };
3189
3190 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
3191
3192 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3193   .function = ip4_rewrite_local,
3194   .name = "ip4-rewrite-local",
3195   .vector_size = sizeof (u32),
3196
3197   .sibling_of = "ip4-rewrite-transit",
3198
3199   .format_trace = format_ip4_rewrite_trace,
3200
3201   .n_next_nodes = 0,
3202 };
3203
3204 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
3205
3206 static clib_error_t *
3207 add_del_interface_table (vlib_main_t * vm,
3208                          unformat_input_t * input,
3209                          vlib_cli_command_t * cmd)
3210 {
3211   vnet_main_t * vnm = vnet_get_main();
3212   clib_error_t * error = 0;
3213   u32 sw_if_index, table_id;
3214
3215   sw_if_index = ~0;
3216
3217   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3218     {
3219       error = clib_error_return (0, "unknown interface `%U'",
3220                                  format_unformat_error, input);
3221       goto done;
3222     }
3223
3224   if (unformat (input, "%d", &table_id))
3225     ;
3226   else
3227     {
3228       error = clib_error_return (0, "expected table id `%U'",
3229                                  format_unformat_error, input);
3230       goto done;
3231     }
3232
3233   {
3234     ip4_main_t * im = &ip4_main;
3235     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3236
3237     if (fib) 
3238       {
3239         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3240         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3241     }
3242   }
3243
3244  done:
3245   return error;
3246 }
3247
3248 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3249   .path = "set interface ip table",
3250   .function = add_del_interface_table,
3251   .short_help = "Add/delete FIB table id for interface",
3252 };
3253
3254
3255 static uword
3256 ip4_lookup_multicast (vlib_main_t * vm,
3257                       vlib_node_runtime_t * node,
3258                       vlib_frame_t * frame)
3259 {
3260   ip4_main_t * im = &ip4_main;
3261   ip_lookup_main_t * lm = &im->lookup_main;
3262   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3263   u32 n_left_from, n_left_to_next, * from, * to_next;
3264   ip_lookup_next_t next;
3265   u32 cpu_index = os_get_cpu_number();
3266
3267   from = vlib_frame_vector_args (frame);
3268   n_left_from = frame->n_vectors;
3269   next = node->cached_next_index;
3270
3271   while (n_left_from > 0)
3272     {
3273       vlib_get_next_frame (vm, node, next,
3274                            to_next, n_left_to_next);
3275
3276       while (n_left_from >= 4 && n_left_to_next >= 2)
3277         {
3278           vlib_buffer_t * p0, * p1;
3279           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3280           ip_lookup_next_t next0, next1;
3281           ip4_header_t * ip0, * ip1;
3282           ip_adjacency_t * adj0, * adj1;
3283           u32 fib_index0, fib_index1;
3284           u32 flow_hash_config0, flow_hash_config1;
3285
3286           /* Prefetch next iteration. */
3287           {
3288             vlib_buffer_t * p2, * p3;
3289
3290             p2 = vlib_get_buffer (vm, from[2]);
3291             p3 = vlib_get_buffer (vm, from[3]);
3292
3293             vlib_prefetch_buffer_header (p2, LOAD);
3294             vlib_prefetch_buffer_header (p3, LOAD);
3295
3296             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3297             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3298           }
3299
3300           pi0 = to_next[0] = from[0];
3301           pi1 = to_next[1] = from[1];
3302
3303           p0 = vlib_get_buffer (vm, pi0);
3304           p1 = vlib_get_buffer (vm, pi1);
3305
3306           ip0 = vlib_buffer_get_current (p0);
3307           ip1 = vlib_buffer_get_current (p1);
3308
3309           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3310           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3311           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3312             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3313           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3314             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3315
3316           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3317                                               &ip0->dst_address, p0);
3318           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3319                                               &ip1->dst_address, p1);
3320
3321           adj0 = ip_get_adjacency (lm, adj_index0);
3322           adj1 = ip_get_adjacency (lm, adj_index1);
3323
3324           next0 = adj0->lookup_next_index;
3325           next1 = adj1->lookup_next_index;
3326
3327           flow_hash_config0 = 
3328               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3329
3330           flow_hash_config1 = 
3331               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3332
3333           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3334               (ip0, flow_hash_config0);
3335                                                                   
3336           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3337               (ip1, flow_hash_config1);
3338
3339           ASSERT (adj0->n_adj > 0);
3340           ASSERT (adj1->n_adj > 0);
3341           ASSERT (is_pow2 (adj0->n_adj));
3342           ASSERT (is_pow2 (adj1->n_adj));
3343           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3344           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3345
3346           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3347           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3348
3349           if (1) /* $$$$$$ HACK FIXME */
3350           vlib_increment_combined_counter 
3351               (cm, cpu_index, adj_index0, 1,
3352                vlib_buffer_length_in_chain (vm, p0));
3353           if (1) /* $$$$$$ HACK FIXME */
3354           vlib_increment_combined_counter 
3355               (cm, cpu_index, adj_index1, 1,
3356                vlib_buffer_length_in_chain (vm, p1));
3357
3358           from += 2;
3359           to_next += 2;
3360           n_left_to_next -= 2;
3361           n_left_from -= 2;
3362
3363           wrong_next = (next0 != next) + 2*(next1 != next);
3364           if (PREDICT_FALSE (wrong_next != 0))
3365             {
3366               switch (wrong_next)
3367                 {
3368                 case 1:
3369                   /* A B A */
3370                   to_next[-2] = pi1;
3371                   to_next -= 1;
3372                   n_left_to_next += 1;
3373                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3374                   break;
3375
3376                 case 2:
3377                   /* A A B */
3378                   to_next -= 1;
3379                   n_left_to_next += 1;
3380                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3381                   break;
3382
3383                 case 3:
3384                   /* A B C */
3385                   to_next -= 2;
3386                   n_left_to_next += 2;
3387                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3388                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3389                   if (next0 == next1)
3390                     {
3391                       /* A B B */
3392                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3393                       next = next1;
3394                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3395                     }
3396                 }
3397             }
3398         }
3399     
3400       while (n_left_from > 0 && n_left_to_next > 0)
3401         {
3402           vlib_buffer_t * p0;
3403           ip4_header_t * ip0;
3404           u32 pi0, adj_index0;
3405           ip_lookup_next_t next0;
3406           ip_adjacency_t * adj0;
3407           u32 fib_index0;
3408           u32 flow_hash_config0;
3409
3410           pi0 = from[0];
3411           to_next[0] = pi0;
3412
3413           p0 = vlib_get_buffer (vm, pi0);
3414
3415           ip0 = vlib_buffer_get_current (p0);
3416
3417           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3418                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3419           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3420               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3421           
3422           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3423                                               &ip0->dst_address, p0);
3424
3425           adj0 = ip_get_adjacency (lm, adj_index0);
3426
3427           next0 = adj0->lookup_next_index;
3428
3429           flow_hash_config0 = 
3430               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3431
3432           vnet_buffer (p0)->ip.flow_hash = 
3433             ip4_compute_flow_hash (ip0, flow_hash_config0);
3434
3435           ASSERT (adj0->n_adj > 0);
3436           ASSERT (is_pow2 (adj0->n_adj));
3437           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3438
3439           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3440
3441           if (1) /* $$$$$$ HACK FIXME */
3442               vlib_increment_combined_counter 
3443                   (cm, cpu_index, adj_index0, 1,
3444                    vlib_buffer_length_in_chain (vm, p0));
3445
3446           from += 1;
3447           to_next += 1;
3448           n_left_to_next -= 1;
3449           n_left_from -= 1;
3450
3451           if (PREDICT_FALSE (next0 != next))
3452             {
3453               n_left_to_next += 1;
3454               vlib_put_next_frame (vm, node, next, n_left_to_next);
3455               next = next0;
3456               vlib_get_next_frame (vm, node, next,
3457                                    to_next, n_left_to_next);
3458               to_next[0] = pi0;
3459               to_next += 1;
3460               n_left_to_next -= 1;
3461             }
3462         }
3463
3464       vlib_put_next_frame (vm, node, next, n_left_to_next);
3465     }
3466
3467   if (node->flags & VLIB_NODE_FLAG_TRACE)
3468       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3469
3470   return frame->n_vectors;
3471 }
3472
3473 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3474   .function = ip4_lookup_multicast,
3475   .name = "ip4-lookup-multicast",
3476   .vector_size = sizeof (u32),
3477   .sibling_of = "ip4-lookup",
3478   .format_trace = format_ip4_lookup_trace,
3479
3480   .n_next_nodes = 0,
3481 };
3482
3483 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
3484
3485 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3486   .function = ip4_drop,
3487   .name = "ip4-multicast",
3488   .vector_size = sizeof (u32),
3489
3490   .format_trace = format_ip4_forward_next_trace,
3491
3492   .n_next_nodes = 1,
3493   .next_nodes = {
3494     [0] = "error-drop",
3495   },
3496 };
3497
3498 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3499 {
3500   ip4_main_t * im = &ip4_main;
3501   ip4_fib_mtrie_t * mtrie0;
3502   ip4_fib_mtrie_leaf_t leaf0;
3503   u32 adj_index0;
3504     
3505   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3506
3507   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3508   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3509   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3510   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3511   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3512   
3513   /* Handle default route. */
3514   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3515   
3516   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3517   
3518   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3519                                                   a, 
3520                                                   /* no_default_route */ 0);
3521 }
3522  
3523 static clib_error_t *
3524 test_lookup_command_fn (vlib_main_t * vm,
3525                         unformat_input_t * input,
3526                         vlib_cli_command_t * cmd)
3527 {
3528   u32 table_id = 0;
3529   f64 count = 1;
3530   u32 n;
3531   int i;
3532   ip4_address_t ip4_base_address;
3533   u64 errors = 0;
3534
3535   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3536       if (unformat (input, "table %d", &table_id))
3537         ;
3538       else if (unformat (input, "count %f", &count))
3539         ;
3540
3541       else if (unformat (input, "%U",
3542                          unformat_ip4_address, &ip4_base_address))
3543         ;
3544       else
3545         return clib_error_return (0, "unknown input `%U'",
3546                                   format_unformat_error, input);
3547   }
3548
3549   n = count;
3550
3551   for (i = 0; i < n; i++)
3552     {
3553       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3554         errors++;
3555
3556       ip4_base_address.as_u32 = 
3557         clib_host_to_net_u32 (1 + 
3558                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3559     }
3560
3561   if (errors) 
3562     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3563   else
3564     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3565
3566   return 0;
3567 }
3568
3569 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3570     .path = "test lookup",
3571     .short_help = "test lookup",
3572     .function = test_lookup_command_fn,
3573 };
3574
3575 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3576 {
3577   ip4_main_t * im4 = &ip4_main;
3578   ip4_fib_t * fib;
3579   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3580
3581   if (p == 0)
3582     return VNET_API_ERROR_NO_SUCH_FIB;
3583
3584   fib = vec_elt_at_index (im4->fibs, p[0]);
3585
3586   fib->flow_hash_config = flow_hash_config;
3587   return 0;
3588 }
3589  
3590 static clib_error_t *
3591 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3592                              unformat_input_t * input,
3593                              vlib_cli_command_t * cmd)
3594 {
3595   int matched = 0;
3596   u32 table_id = 0;
3597   u32 flow_hash_config = 0;
3598   int rv;
3599
3600   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3601     if (unformat (input, "table %d", &table_id))
3602       matched = 1;
3603 #define _(a,v) \
3604     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3605     foreach_flow_hash_bit
3606 #undef _
3607     else break;
3608   }
3609   
3610   if (matched == 0)
3611     return clib_error_return (0, "unknown input `%U'",
3612                               format_unformat_error, input);
3613   
3614   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3615   switch (rv)
3616     {
3617     case 0:
3618       break;
3619       
3620     case VNET_API_ERROR_NO_SUCH_FIB:
3621       return clib_error_return (0, "no such FIB table %d", table_id);
3622       
3623     default:
3624       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3625       break;
3626     }
3627   
3628   return 0;
3629 }
3630  
3631 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3632   .path = "set ip flow-hash",
3633   .short_help = 
3634   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3635   .function = set_ip_flow_hash_command_fn,
3636 };
3637  
3638 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3639                                  u32 table_index)
3640 {
3641   vnet_main_t * vnm = vnet_get_main();
3642   vnet_interface_main_t * im = &vnm->interface_main;
3643   ip4_main_t * ipm = &ip4_main;
3644   ip_lookup_main_t * lm = &ipm->lookup_main;
3645   vnet_classify_main_t * cm = &vnet_classify_main;
3646
3647   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3648     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3649
3650   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3651     return VNET_API_ERROR_NO_SUCH_ENTRY;
3652
3653   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3654   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3655
3656   return 0;
3657 }
3658
3659 static clib_error_t *
3660 set_ip_classify_command_fn (vlib_main_t * vm,
3661                             unformat_input_t * input,
3662                             vlib_cli_command_t * cmd)
3663 {
3664   u32 table_index = ~0;
3665   int table_index_set = 0;
3666   u32 sw_if_index = ~0;
3667   int rv;
3668   
3669   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3670     if (unformat (input, "table-index %d", &table_index))
3671       table_index_set = 1;
3672     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3673                        vnet_get_main(), &sw_if_index))
3674       ;
3675     else
3676       break;
3677   }
3678       
3679   if (table_index_set == 0)
3680     return clib_error_return (0, "classify table-index must be specified");
3681
3682   if (sw_if_index == ~0)
3683     return clib_error_return (0, "interface / subif must be specified");
3684
3685   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3686
3687   switch (rv)
3688     {
3689     case 0:
3690       break;
3691
3692     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3693       return clib_error_return (0, "No such interface");
3694
3695     case VNET_API_ERROR_NO_SUCH_ENTRY:
3696       return clib_error_return (0, "No such classifier table");
3697     }
3698   return 0;
3699 }
3700
3701 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3702     .path = "set ip classify",
3703     .short_help = 
3704     "set ip classify intfc <int> table-index <index>",
3705     .function = set_ip_classify_command_fn,
3706 };
3707
3708
3709 #define TEST_CODE 1
3710 #if TEST_CODE > 0
3711
3712 static clib_error_t *
3713 set_interface_output_feature_command_fn (vlib_main_t * vm,
3714                                          unformat_input_t * input,
3715                                          vlib_cli_command_t * cmd)
3716 {
3717   vnet_main_t * vnm = vnet_get_main();
3718   u32 sw_if_index = ~0;
3719   int is_add = 1;
3720   ip4_main_t * im = &ip4_main;
3721   ip_lookup_main_t * lm = &im->lookup_main;
3722
3723   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
3724     {
3725       if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3726         ;
3727       else if (unformat (input, "del"))
3728         is_add = 0;
3729       else
3730         break;
3731     }
3732
3733   if (sw_if_index == ~0)
3734     return clib_error_return (0, "unknown interface `%U'",
3735                               format_unformat_error, input);
3736
3737   lm->tx_sw_if_has_ip_output_features =
3738     clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3739
3740   return 0;
3741 }
3742
3743 VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
3744   .path = "set interface output feature",
3745   .function = set_interface_output_feature_command_fn,
3746   .short_help = "set interface output feature <intfc>",
3747 };
3748 #endif /* TEST_CODE */