Save rewrite length for post-rewrite rewind action
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>   
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>     
51
52 /** @file
53     vnet ip4 forwarding
54 */
55
56 /* This is really, really simple but stupid fib. */
57 u32
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
59                            ip4_address_t * dst,
60                            u32 disable_default_route)
61 {
62   ip_lookup_main_t * lm = &im->lookup_main;
63   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64   uword * p, * hash, key;
65   i32 i, i_min, dst_address, ai;
66
67   i_min = disable_default_route ? 1 : 0;
68   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
70     {
71       hash = fib->adj_index_by_dst_address[i];
72       if (! hash)
73         continue;
74
75       key = dst_address & im->fib_masks[i];
76       if ((p = hash_get (hash, key)) != 0)
77         {
78           ai = p[0];
79           goto done;
80         }
81     }
82
83   /* Nothing matches in table. */
84   ai = lm->miss_adj_index;
85
86  done:
87   return ai;
88 }
89
90 /** @brief Create FIB from table ID and init all hashing.
91     @param im - @ref ip4_main_t
92     @param table_id - table ID
93     @return fib - @ref ip4_fib_t
94 */
95 static ip4_fib_t *
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
97 {
98   ip4_fib_t * fib;
99   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100   vec_add2 (im->fibs, fib, 1);
101   fib->table_id = table_id;
102   fib->index = fib - im->fibs;
103   /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105   fib->fwd_classify_table_index = ~0;
106   fib->rev_classify_table_index = ~0;
107   ip4_mtrie_init (&fib->mtrie);
108   return fib;
109 }
110
111 /** @brief Find existing or Create new FIB based on index
112     @param im @ref ip4_main_t
113     @param table_index_or_id - overloaded parameter referring
114            to the table or a table's index in the FIB vector
115     @param flags - used to check if table_index_or_id was a table or
116            an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117     @return either the existing or a new ip4_fib_t entry
118 */
119 ip4_fib_t *
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121                                    u32 table_index_or_id, u32 flags)
122 {
123   uword * p, fib_index;
124
125   fib_index = table_index_or_id;
126   /* If this isn't a FIB_INDEX ... */
127   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
128     {
129       /* If passed ~0 then request the next table available */
130       if (table_index_or_id == ~0) {
131         table_index_or_id = 0;
132         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
133           table_index_or_id++;
134         }
135         /* Create the next table and return the ip4_fib_t associated with it */
136         return create_fib_with_table_id (im, table_index_or_id);
137       }
138       /* A specific table_id was requested.. */
139       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140       /* ... and if it doesn't exist create it else grab its index */
141       if (! p)
142         return create_fib_with_table_id (im, table_index_or_id);
143       fib_index = p[0];
144     }
145   /* Return the ip4_fib_t associated with this index */
146   return vec_elt_at_index (im->fibs, fib_index);
147 }
148
149 static void
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
151                                        ip4_fib_t * fib,
152                                        u32 address_length)
153 {
154   hash_t * h;
155   uword max_index;
156
157   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
159
160   fib->adj_index_by_dst_address[address_length] =
161     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
162
163   hash_set_flags (fib->adj_index_by_dst_address[address_length],
164                   HASH_FLAG_NO_AUTO_SHRINK);
165
166   h = hash_header (fib->adj_index_by_dst_address[address_length]);
167   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
168
169   /* Initialize new/old hash value vectors. */
170   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
172 }
173
174 static void
175 ip4_fib_set_adj_index (ip4_main_t * im,
176                        ip4_fib_t * fib,
177                        u32 flags,
178                        u32 dst_address_u32,
179                        u32 dst_address_length,
180                        u32 adj_index)
181 {
182   ip_lookup_main_t * lm = &im->lookup_main;
183   uword * hash;
184
185   if (vec_bytes(fib->old_hash_values))
186     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187   if (vec_bytes(fib->new_hash_values))
188     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189   fib->new_hash_values[0] = adj_index;
190
191   /* Make sure adj index is valid. */
192   if (CLIB_DEBUG > 0)
193     (void) ip_get_adjacency (lm, adj_index);
194
195   hash = fib->adj_index_by_dst_address[dst_address_length];
196
197   hash = _hash_set3 (hash, dst_address_u32,
198                      fib->new_hash_values,
199                      fib->old_hash_values);
200
201   fib->adj_index_by_dst_address[dst_address_length] = hash;
202
203   if (vec_len (im->add_del_route_callbacks) > 0)
204     {
205       ip4_add_del_route_callback_t * cb;
206       ip4_address_t d;
207       uword * p;
208
209       d.data_u32 = dst_address_u32;
210       vec_foreach (cb, im->add_del_route_callbacks)
211         if ((flags & cb->required_flags) == cb->required_flags)
212           cb->function (im, cb->function_opaque,
213                         fib, flags,
214                         &d, dst_address_length,
215                         fib->old_hash_values,
216                         fib->new_hash_values);
217
218       p = hash_get (hash, dst_address_u32);
219       /* hash_get should never return NULL here */
220       if (p)
221           clib_memcpy (p, fib->new_hash_values, 
222                        vec_bytes (fib->new_hash_values));
223       else
224           ASSERT(0);
225     }
226 }
227
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
229 {
230   ip_lookup_main_t * lm = &im->lookup_main;
231   ip4_fib_t * fib;
232   u32 dst_address, dst_address_length, adj_index, old_adj_index;
233   uword * hash, is_del;
234   ip4_add_del_route_callback_t * cb;
235
236   /* Either create new adjacency or use given one depending on arguments. */
237   if (a->n_add_adj > 0)
238     {
239       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
241     }
242   else
243     adj_index = a->adj_index;
244
245   dst_address = a->dst_address.data_u32;
246   dst_address_length = a->dst_address_length;
247   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
248
249   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250   dst_address &= im->fib_masks[dst_address_length];
251
252   if (! fib->adj_index_by_dst_address[dst_address_length])
253     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
254
255   hash = fib->adj_index_by_dst_address[dst_address_length];
256
257   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
258
259   if (is_del)
260     {
261       fib->old_hash_values[0] = ~0;
262       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263       fib->adj_index_by_dst_address[dst_address_length] = hash;
264
265       if (vec_len (im->add_del_route_callbacks) > 0
266           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
267         {
268           fib->new_hash_values[0] = ~0;
269           vec_foreach (cb, im->add_del_route_callbacks)
270             if ((a->flags & cb->required_flags) == cb->required_flags)
271               cb->function (im, cb->function_opaque,
272                             fib, a->flags,
273                             &a->dst_address, dst_address_length,
274                             fib->old_hash_values,
275                             fib->new_hash_values);
276         }
277     }
278   else
279     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
280                            adj_index);
281
282   old_adj_index = fib->old_hash_values[0];
283
284   /* Avoid spurious reference count increments */
285   if (old_adj_index == adj_index
286       && adj_index != ~0
287       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
288     {
289       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290       if (adj->share_count > 0)
291         adj->share_count --;
292     }
293
294   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295                                is_del ? old_adj_index : adj_index,
296                                is_del);
297
298   /* Delete old adjacency index if present and changed. */
299   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300       && old_adj_index != ~0
301       && old_adj_index != adj_index)
302     ip_del_adjacency (lm, old_adj_index);
303 }
304
305
306 u32
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
308                             u32 fib_index,
309                             ip4_address_t *next_hop,
310                             u32 next_hop_sw_if_index,
311                             u32 explicit_fib_index)
312 {
313   ip_lookup_main_t * lm = &im->lookup_main;
314   vnet_main_t * vnm = vnet_get_main();
315   uword * nh_hash, * nh_result;
316   int is_interface_next_hop;
317   u32 nh_adj_index;
318   ip4_fib_t * fib;
319
320   fib = vec_elt_at_index (im->fibs, fib_index);
321
322   is_interface_next_hop = next_hop->data_u32 == 0;
323   if (is_interface_next_hop)
324     {
325       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
326       if (nh_result)
327           nh_adj_index = *nh_result;
328       else
329         {
330            ip_adjacency_t * adj;
331            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
332                                    &nh_adj_index);
333            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
336         }
337     }
338   else if (next_hop_sw_if_index == ~0)
339     {
340       /* next-hop is recursive. we always need a indirect adj
341        * for recursive paths. Any LPM we perform now will give
342        * us a valid adj, but without tracking the next-hop we
343        * have no way to keep it valid.
344        */
345       ip_adjacency_t add_adj;
346       memset (&add_adj, 0, sizeof(add_adj));
347       add_adj.n_adj = 1;
348       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350       add_adj.explicit_fib_index = explicit_fib_index;
351       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
352     }
353   else
354     {
355       nh_hash = fib->adj_index_by_dst_address[32];
356       nh_result = hash_get (nh_hash, next_hop->data_u32);
357
358       /* Next hop must be known. */
359       if (! nh_result)
360         {
361           ip_adjacency_t * adj;
362
363           /* no /32 exists, get the longest prefix match */
364           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
365                                                     next_hop, 0);
366           adj = ip_get_adjacency (lm, nh_adj_index);
367           /* if ARP interface adjacency is present, we need to
368              install ARP adjaceny for specific next hop */
369           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370               adj->arp.next_hop.ip4.as_u32 == 0)
371             {
372               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
373             }
374         }
375       else
376         {
377           nh_adj_index = *nh_result;
378         }
379     }
380
381   return (nh_adj_index);
382 }
383
384 void
385 ip4_add_del_route_next_hop (ip4_main_t * im,
386                             u32 flags,
387                             ip4_address_t * dst_address,
388                             u32 dst_address_length,
389                             ip4_address_t * next_hop,
390                             u32 next_hop_sw_if_index,
391                             u32 next_hop_weight, u32 adj_index, 
392                             u32 explicit_fib_index)
393 {
394   vnet_main_t * vnm = vnet_get_main();
395   ip_lookup_main_t * lm = &im->lookup_main;
396   u32 fib_index;
397   ip4_fib_t * fib;
398   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399   u32 dst_adj_index, nh_adj_index;
400   uword * dst_hash, * dst_result;
401   ip_adjacency_t * dst_adj;
402   ip_multipath_adjacency_t * old_mp, * new_mp;
403   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404   clib_error_t * error = 0;
405
406   if (explicit_fib_index == (u32)~0)
407       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
408   else
409       fib_index = explicit_fib_index;
410
411   fib = vec_elt_at_index (im->fibs, fib_index);
412
413   /* Lookup next hop to be added or deleted. */
414   if (adj_index == (u32)~0)
415     {
416         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
417                                                   next_hop,
418                                                   next_hop_sw_if_index,
419                                                   explicit_fib_index);
420     }
421   else
422     {
423       nh_adj_index = adj_index;
424     }
425   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
427
428   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429   dst_result = hash_get (dst_hash, dst_address_u32);
430   if (dst_result)
431     {
432       dst_adj_index = dst_result[0];
433       dst_adj = ip_get_adjacency (lm, dst_adj_index);
434     }
435   else
436     {
437       /* For deletes destination must be known. */
438       if (is_del)
439         {
440           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441           error = clib_error_return (0, "unknown destination %U/%d",
442                                      format_ip4_address, dst_address,
443                                      dst_address_length);
444           goto done;
445         }
446
447       dst_adj_index = ~0;
448       dst_adj = 0;
449     }
450
451   /* Ignore adds of X/32 with next hop of X. */
452   if (! is_del
453       && dst_address_length == 32
454       && dst_address->data_u32 == next_hop->data_u32 
455       && adj_index != (u32)~0)
456     {
457       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458       error = clib_error_return (0, "prefix matches next hop %U/%d",
459                                  format_ip4_address, dst_address,
460                                  dst_address_length);
461       goto done;
462     }
463
464   /* Destination is not known and default weight is set so add route
465      to existing non-multipath adjacency */
466   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
467     {
468       /* create / delete additional mapping of existing adjacency */
469       ip4_add_del_route_args_t a;
470
471       a.table_index_or_table_id = fib_index;
472       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473                  | IP4_ROUTE_FLAG_FIB_INDEX
474                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477       a.dst_address = dst_address[0];
478       a.dst_address_length = dst_address_length;
479       a.adj_index = nh_adj_index;
480       a.add_adj = 0;
481       a.n_add_adj = 0;
482
483       ip4_add_del_route (im, &a);
484       goto done;
485     }
486
487   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
488
489   if (! ip_multipath_adjacency_add_del_next_hop
490       (lm, is_del,
491        old_mp_adj_index,
492        nh_adj_index,
493        next_hop_weight,
494        &new_mp_adj_index))
495     {
496       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498                                  format_ip4_address, next_hop);
499       goto done;
500     }
501   
502   old_mp = new_mp = 0;
503   if (old_mp_adj_index != ~0)
504     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505   if (new_mp_adj_index != ~0)
506     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
507
508   if (old_mp != new_mp)
509     {
510       ip4_add_del_route_args_t a;
511       ip_adjacency_t * adj;
512
513       a.table_index_or_table_id = fib_index;
514       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515                  | IP4_ROUTE_FLAG_FIB_INDEX
516                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518       a.dst_address = dst_address[0];
519       a.dst_address_length = dst_address_length;
520       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
521       a.add_adj = 0;
522       a.n_add_adj = 0;
523
524       ip4_add_del_route (im, &a);
525
526       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
527       if (adj->n_adj == 1)
528         adj->share_count += is_del ? -1 : 1;
529     }
530
531  done:
532   if (error)
533     clib_error_report (error);
534 }
535
536 void *
537 ip4_get_route (ip4_main_t * im,
538                u32 table_index_or_table_id,
539                u32 flags,
540                u8 * address,
541                u32 address_length)
542 {
543   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544   u32 dst_address = * (u32 *) address;
545   uword * hash, * p;
546
547   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548   dst_address &= im->fib_masks[address_length];
549
550   hash = fib->adj_index_by_dst_address[address_length];
551   p = hash_get (hash, dst_address);
552   return (void *) p;
553 }
554
555 void
556 ip4_foreach_matching_route (ip4_main_t * im,
557                             u32 table_index_or_table_id,
558                             u32 flags,
559                             ip4_address_t * address,
560                             u32 address_length,
561                             ip4_address_t ** results,
562                             u8 ** result_lengths)
563 {
564   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565   u32 dst_address = address->data_u32;
566   u32 this_length = address_length;
567   
568   if (*results)
569     _vec_len (*results) = 0;
570   if (*result_lengths)
571     _vec_len (*result_lengths) = 0;
572
573   while (this_length <= 32 && vec_len (results) == 0)
574     {
575       uword k, v;
576       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
578           {
579             ip4_address_t a;
580             a.data_u32 = k;
581             vec_add1 (*results, a);
582             vec_add1 (*result_lengths, this_length);
583           }
584       }));
585
586       this_length++;
587     }
588 }
589
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591                                   u32 table_index_or_table_id,
592                                   u32 flags)
593 {
594   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595   ip_lookup_main_t * lm = &im->lookup_main;
596   u32 i, l;
597   ip4_address_t a;
598   ip4_add_del_route_callback_t * cb;
599   static ip4_address_t * to_delete;
600
601   if (lm->n_adjacency_remaps == 0)
602     return;
603
604   for (l = 0; l <= 32; l++)
605     {
606       hash_pair_t * p;
607       uword * hash = fib->adj_index_by_dst_address[l];
608
609       if (hash_elts (hash) == 0)
610         continue;
611
612       if (to_delete)
613         _vec_len (to_delete) = 0;
614
615       hash_foreach_pair (p, hash, ({
616         u32 adj_index = p->value[0];
617         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
618
619         if (m)
620           {
621             /* Record destination address from hash key. */
622             a.data_u32 = p->key;
623
624             /* New adjacency points to nothing: so delete prefix. */
625             if (m == ~0)
626               vec_add1 (to_delete, a);
627             else
628               {
629                 /* Remap to new adjacency. */
630                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
631
632                 /* Set new adjacency value. */
633                 fib->new_hash_values[0] = p->value[0] = m - 1;
634
635                 vec_foreach (cb, im->add_del_route_callbacks)
636                   if ((flags & cb->required_flags) == cb->required_flags)
637                     cb->function (im, cb->function_opaque,
638                                   fib, flags | IP4_ROUTE_FLAG_ADD,
639                                   &a, l,
640                                   fib->old_hash_values,
641                                   fib->new_hash_values);
642               }
643           }
644       }));
645
646       fib->new_hash_values[0] = ~0;
647       for (i = 0; i < vec_len (to_delete); i++)
648         {
649           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650           vec_foreach (cb, im->add_del_route_callbacks)
651             if ((flags & cb->required_flags) == cb->required_flags)
652               cb->function (im, cb->function_opaque,
653                             fib, flags | IP4_ROUTE_FLAG_DEL,
654                             &a, l,
655                             fib->old_hash_values,
656                             fib->new_hash_values);
657         }
658     }
659
660   /* Also remap adjacencies in mtrie. */
661   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
662
663   /* Reset mapping table. */
664   vec_zero (lm->adjacency_remap_table);
665
666   /* All remaps have been performed. */
667   lm->n_adjacency_remaps = 0;
668 }
669
670 void ip4_delete_matching_routes (ip4_main_t * im,
671                                  u32 table_index_or_table_id,
672                                  u32 flags,
673                                  ip4_address_t * address,
674                                  u32 address_length)
675 {
676   static ip4_address_t * matching_addresses;
677   static u8 * matching_address_lengths;
678   u32 l, i;
679   ip4_add_del_route_args_t a;
680
681   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682   a.table_index_or_table_id = table_index_or_table_id;
683   a.adj_index = ~0;
684   a.add_adj = 0;
685   a.n_add_adj = 0;
686
687   for (l = address_length + 1; l <= 32; l++)
688     {
689       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
690                                   address,
691                                   l,
692                                   &matching_addresses,
693                                   &matching_address_lengths);
694       for (i = 0; i < vec_len (matching_addresses); i++)
695         {
696           a.dst_address = matching_addresses[i];
697           a.dst_address_length = matching_address_lengths[i];
698           ip4_add_del_route (im, &a);
699         }
700     }
701
702   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
703 }
704
705 void
706 ip4_forward_next_trace (vlib_main_t * vm,
707                         vlib_node_runtime_t * node,
708                         vlib_frame_t * frame,
709                         vlib_rx_or_tx_t which_adj_index);
710
711 always_inline uword
712 ip4_lookup_inline (vlib_main_t * vm,
713                    vlib_node_runtime_t * node,
714                    vlib_frame_t * frame,
715                    int lookup_for_responses_to_locally_received_packets,
716                    int is_indirect)
717 {
718   ip4_main_t * im = &ip4_main;
719   ip_lookup_main_t * lm = &im->lookup_main;
720   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721   u32 n_left_from, n_left_to_next, * from, * to_next;
722   ip_lookup_next_t next;
723   u32 cpu_index = os_get_cpu_number();
724
725   from = vlib_frame_vector_args (frame);
726   n_left_from = frame->n_vectors;
727   next = node->cached_next_index;
728
729   while (n_left_from > 0)
730     {
731       vlib_get_next_frame (vm, node, next,
732                            to_next, n_left_to_next);
733
734       while (n_left_from >= 4 && n_left_to_next >= 2)
735         {
736           vlib_buffer_t * p0, * p1;
737           ip4_header_t * ip0, * ip1;
738           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739           ip_lookup_next_t next0, next1;
740           ip_adjacency_t * adj0, * adj1;
741           ip4_fib_mtrie_t * mtrie0, * mtrie1;
742           ip4_fib_mtrie_leaf_t leaf0, leaf1;
743           ip4_address_t * dst_addr0, *dst_addr1;
744           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746           u32 flow_hash_config0, flow_hash_config1;
747           u32 hash_c0, hash_c1;
748           u32 wrong_next;
749
750           /* Prefetch next iteration. */
751           {
752             vlib_buffer_t * p2, * p3;
753
754             p2 = vlib_get_buffer (vm, from[2]);
755             p3 = vlib_get_buffer (vm, from[3]);
756
757             vlib_prefetch_buffer_header (p2, LOAD);
758             vlib_prefetch_buffer_header (p3, LOAD);
759
760             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
762           }
763
764           pi0 = to_next[0] = from[0];
765           pi1 = to_next[1] = from[1];
766
767           p0 = vlib_get_buffer (vm, pi0);
768           p1 = vlib_get_buffer (vm, pi1);
769
770           ip0 = vlib_buffer_get_current (p0);
771           ip1 = vlib_buffer_get_current (p1);
772
773           if (is_indirect)
774             {
775               ip_adjacency_t * iadj0, * iadj1;
776               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778               dst_addr0 = &iadj0->indirect.next_hop.ip4;
779               dst_addr1 = &iadj1->indirect.next_hop.ip4;
780             }
781           else
782             {
783               dst_addr0 = &ip0->dst_address;
784               dst_addr1 = &ip1->dst_address;
785             }
786
787           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
793
794
795           if (! lookup_for_responses_to_locally_received_packets)
796             {
797               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
799
800               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
801
802               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
804             }
805
806           tcp0 = (void *) (ip0 + 1);
807           tcp1 = (void *) (ip1 + 1);
808
809           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810                          || ip0->protocol == IP_PROTOCOL_UDP);
811           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812                          || ip1->protocol == IP_PROTOCOL_UDP);
813
814           if (! lookup_for_responses_to_locally_received_packets)
815             {
816               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
818             }
819
820           if (! lookup_for_responses_to_locally_received_packets)
821             {
822               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
824             }
825
826           if (! lookup_for_responses_to_locally_received_packets)
827             {
828               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
830             }
831
832           if (lookup_for_responses_to_locally_received_packets)
833             {
834               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
836             }
837           else
838             {
839               /* Handle default route. */
840               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
842
843               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
845             }
846
847           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
848                                                            dst_addr0,
849                                                            /* no_default_route */ 0));
850           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
851                                                            dst_addr1,
852                                                            /* no_default_route */ 0));
853           adj0 = ip_get_adjacency (lm, adj_index0);
854           adj1 = ip_get_adjacency (lm, adj_index1);
855
856           next0 = adj0->lookup_next_index;
857           next1 = adj1->lookup_next_index;
858
859           /* Use flow hash to compute multipath adjacency. */
860           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862           if (PREDICT_FALSE (adj0->n_adj > 1))
863             {
864               flow_hash_config0 = 
865                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
867                 ip4_compute_flow_hash (ip0, flow_hash_config0);
868             }
869           if (PREDICT_FALSE(adj1->n_adj > 1))
870             {
871               flow_hash_config1 = 
872                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
874                 ip4_compute_flow_hash (ip1, flow_hash_config1);
875             }
876
877           ASSERT (adj0->n_adj > 0);
878           ASSERT (adj1->n_adj > 0);
879           ASSERT (is_pow2 (adj0->n_adj));
880           ASSERT (is_pow2 (adj1->n_adj));
881           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
883
884           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
886
887           vlib_increment_combined_counter 
888               (cm, cpu_index, adj_index0, 1,
889                vlib_buffer_length_in_chain (vm, p0) 
890                + sizeof(ethernet_header_t));
891           vlib_increment_combined_counter 
892               (cm, cpu_index, adj_index1, 1,
893                vlib_buffer_length_in_chain (vm, p1)
894                + sizeof(ethernet_header_t));
895
896           from += 2;
897           to_next += 2;
898           n_left_to_next -= 2;
899           n_left_from -= 2;
900
901           wrong_next = (next0 != next) + 2*(next1 != next);
902           if (PREDICT_FALSE (wrong_next != 0))
903             {
904               switch (wrong_next)
905                 {
906                 case 1:
907                   /* A B A */
908                   to_next[-2] = pi1;
909                   to_next -= 1;
910                   n_left_to_next += 1;
911                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
912                   break;
913
914                 case 2:
915                   /* A A B */
916                   to_next -= 1;
917                   n_left_to_next += 1;
918                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
919                   break;
920
921                 case 3:
922                   /* A B C */
923                   to_next -= 2;
924                   n_left_to_next += 2;
925                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
926                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
927                   if (next0 == next1)
928                     {
929                       /* A B B */
930                       vlib_put_next_frame (vm, node, next, n_left_to_next);
931                       next = next1;
932                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
933                     }
934                 }
935             }
936         }
937     
938       while (n_left_from > 0 && n_left_to_next > 0)
939         {
940           vlib_buffer_t * p0;
941           ip4_header_t * ip0;
942           __attribute__((unused)) tcp_header_t * tcp0;
943           ip_lookup_next_t next0;
944           ip_adjacency_t * adj0;
945           ip4_fib_mtrie_t * mtrie0;
946           ip4_fib_mtrie_leaf_t leaf0;
947           ip4_address_t * dst_addr0;
948           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
949           u32 flow_hash_config0, hash_c0;
950
951           pi0 = from[0];
952           to_next[0] = pi0;
953
954           p0 = vlib_get_buffer (vm, pi0);
955
956           ip0 = vlib_buffer_get_current (p0);
957
958           if (is_indirect)
959             {
960               ip_adjacency_t * iadj0;
961               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
962               dst_addr0 = &iadj0->indirect.next_hop.ip4;
963             }
964           else
965             {
966               dst_addr0 = &ip0->dst_address;
967             }
968
969           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
970           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
971             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
972
973           if (! lookup_for_responses_to_locally_received_packets)
974             {
975               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
976
977               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
978
979               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
980             }
981
982           tcp0 = (void *) (ip0 + 1);
983
984           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
985                          || ip0->protocol == IP_PROTOCOL_UDP);
986
987           if (! lookup_for_responses_to_locally_received_packets)
988             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
989
990           if (! lookup_for_responses_to_locally_received_packets)
991             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
992
993           if (! lookup_for_responses_to_locally_received_packets)
994             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
995
996           if (lookup_for_responses_to_locally_received_packets)
997             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
998           else
999             {
1000               /* Handle default route. */
1001               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1002               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1003             }
1004
1005           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1006                                                            dst_addr0,
1007                                                            /* no_default_route */ 0));
1008
1009           adj0 = ip_get_adjacency (lm, adj_index0);
1010
1011           next0 = adj0->lookup_next_index;
1012
1013           /* Use flow hash to compute multipath adjacency. */
1014           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1015           if (PREDICT_FALSE(adj0->n_adj > 1))
1016             {
1017               flow_hash_config0 = 
1018                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1019
1020               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1021                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1022             }
1023
1024           ASSERT (adj0->n_adj > 0);
1025           ASSERT (is_pow2 (adj0->n_adj));
1026           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1027
1028           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1029
1030           vlib_increment_combined_counter 
1031               (cm, cpu_index, adj_index0, 1,
1032                vlib_buffer_length_in_chain (vm, p0)
1033                + sizeof(ethernet_header_t));
1034
1035           from += 1;
1036           to_next += 1;
1037           n_left_to_next -= 1;
1038           n_left_from -= 1;
1039
1040           if (PREDICT_FALSE (next0 != next))
1041             {
1042               n_left_to_next += 1;
1043               vlib_put_next_frame (vm, node, next, n_left_to_next);
1044               next = next0;
1045               vlib_get_next_frame (vm, node, next,
1046                                    to_next, n_left_to_next);
1047               to_next[0] = pi0;
1048               to_next += 1;
1049               n_left_to_next -= 1;
1050             }
1051         }
1052
1053       vlib_put_next_frame (vm, node, next, n_left_to_next);
1054     }
1055
1056   if (node->flags & VLIB_NODE_FLAG_TRACE)
1057     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1058
1059   return frame->n_vectors;
1060 }
1061
1062 /** @brief IPv4 lookup node.
1063     @node ip4-lookup
1064
1065     This is the main IPv4 lookup dispatch node.
1066
1067     @param vm vlib_main_t corresponding to the current thread
1068     @param node vlib_node_runtime_t
1069     @param frame vlib_frame_t whose contents should be dispatched
1070
1071     @par Graph mechanics: buffer metadata, next index usage
1072
1073     @em Uses:
1074     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1075         - Indicates the @c sw_if_index value of the interface that the
1076           packet was received on.
1077     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1078         - When the value is @c ~0 then the node performs a longest prefix
1079           match (LPM) for the packet destination address in the FIB attached
1080           to the receive interface.
1081         - Otherwise perform LPM for the packet destination address in the
1082           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1083           value (0, 1, ...) and not a VRF id.
1084
1085     @em Sets:
1086     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1087         - The lookup result adjacency index.
1088
1089     <em>Next Index:</em>
1090     - Dispatches the packet to the node index found in
1091       ip_adjacency_t @c adj->lookup_next_index
1092       (where @c adj is the lookup result adjacency).
1093 */
1094 static uword
1095 ip4_lookup (vlib_main_t * vm,
1096             vlib_node_runtime_t * node,
1097             vlib_frame_t * frame)
1098 {
1099   return ip4_lookup_inline (vm, node, frame,
1100                             /* lookup_for_responses_to_locally_received_packets */ 0,
1101                             /* is_indirect */ 0);
1102
1103 }
1104
1105 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1106                                         ip_adjacency_t * adj,
1107                                         u32 sw_if_index,
1108                                         u32 if_address_index)
1109 {
1110   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1111   ip_lookup_next_t n;
1112   vnet_l3_packet_type_t packet_type;
1113   u32 node_index;
1114
1115   if (hw->hw_class_index == ethernet_hw_interface_class.index
1116       || hw->hw_class_index == srp_hw_interface_class.index)
1117     {
1118       /* 
1119        * We have a bit of a problem in this case. ip4-arp uses
1120        * the rewrite_header.next_index to hand pkts to the
1121        * indicated inteface output node. We can end up in
1122        * ip4_rewrite_local, too, which also pays attention to 
1123        * rewrite_header.next index. Net result: a hack in
1124        * ip4_rewrite_local...
1125        */
1126       n = IP_LOOKUP_NEXT_ARP;
1127       node_index = ip4_arp_node.index;
1128       adj->if_address_index = if_address_index;
1129       adj->arp.next_hop.ip4.as_u32 = 0;
1130       ip46_address_reset(&adj->arp.next_hop);
1131       packet_type = VNET_L3_PACKET_TYPE_ARP;
1132     }
1133   else
1134     {
1135       n = IP_LOOKUP_NEXT_REWRITE;
1136       node_index = ip4_rewrite_node.index;
1137       packet_type = VNET_L3_PACKET_TYPE_IP4;
1138     }
1139
1140   adj->lookup_next_index = n;
1141   vnet_rewrite_for_sw_interface
1142     (vnm,
1143      packet_type,
1144      sw_if_index,
1145      node_index,
1146      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1147      &adj->rewrite_header,
1148      sizeof (adj->rewrite_data));
1149 }
1150
1151 static void
1152 ip4_add_interface_routes (u32 sw_if_index,
1153                           ip4_main_t * im, u32 fib_index,
1154                           ip_interface_address_t * a)
1155 {
1156   vnet_main_t * vnm = vnet_get_main();
1157   ip_lookup_main_t * lm = &im->lookup_main;
1158   ip_adjacency_t * adj;
1159   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1160   ip4_add_del_route_args_t x;
1161   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1162   u32 classify_table_index;
1163
1164   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1165   x.table_index_or_table_id = fib_index;
1166   x.flags = (IP4_ROUTE_FLAG_ADD
1167              | IP4_ROUTE_FLAG_FIB_INDEX
1168              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1169   x.dst_address = address[0];
1170   x.dst_address_length = a->address_length;
1171   x.n_add_adj = 0;
1172   x.add_adj = 0;
1173
1174   a->neighbor_probe_adj_index = ~0;
1175   if (a->address_length < 32)
1176     {
1177       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1178                               &x.adj_index);
1179       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1180       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1181       ip4_add_del_route (im, &x);
1182       a->neighbor_probe_adj_index = x.adj_index;
1183     }
1184   
1185   /* Add e.g. 1.1.1.1/32 as local to this host. */
1186   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1187                           &x.adj_index);
1188   
1189   classify_table_index = ~0;
1190   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1191     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1192   if (classify_table_index != (u32) ~0)
1193     {
1194       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1195       adj->classify.table_index = classify_table_index;
1196     }
1197   else
1198     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1199   
1200   adj->if_address_index = a - lm->if_address_pool;
1201   adj->rewrite_header.sw_if_index = sw_if_index;
1202   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1203   /* 
1204    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1205    * fail an RPF-ish check, but still go thru the rewrite code...
1206    */
1207   adj->rewrite_header.data_bytes = 0;
1208
1209   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1210   x.dst_address_length = 32;
1211   ip4_add_del_route (im, &x);
1212 }
1213
1214 static void
1215 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1216 {
1217   ip4_add_del_route_args_t x;
1218
1219   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1220   x.table_index_or_table_id = fib_index;
1221   x.flags = (IP4_ROUTE_FLAG_DEL
1222              | IP4_ROUTE_FLAG_FIB_INDEX
1223              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1224   x.dst_address = address[0];
1225   x.dst_address_length = address_length;
1226   x.adj_index = ~0;
1227   x.n_add_adj = 0;
1228   x.add_adj = 0;
1229
1230   if (address_length < 32)
1231     ip4_add_del_route (im, &x);
1232
1233   x.dst_address_length = 32;
1234   ip4_add_del_route (im, &x);
1235
1236   ip4_delete_matching_routes (im,
1237                               fib_index,
1238                               IP4_ROUTE_FLAG_FIB_INDEX,
1239                               address,
1240                               address_length);
1241 }
1242
1243 typedef struct {
1244     u32 sw_if_index;
1245     ip4_address_t address;
1246     u32 length;
1247 } ip4_interface_address_t;
1248
1249 static clib_error_t *
1250 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1251                                         u32 sw_if_index,
1252                                         ip4_address_t * new_address,
1253                                         u32 new_length,
1254                                         u32 redistribute,
1255                                         u32 insert_routes,
1256                                         u32 is_del);
1257
1258 static clib_error_t *
1259 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1260                                         u32 sw_if_index,
1261                                         ip4_address_t * address,
1262                                         u32 address_length,
1263                                         u32 redistribute,
1264                                         u32 insert_routes,
1265                                         u32 is_del)
1266 {
1267   vnet_main_t * vnm = vnet_get_main();
1268   ip4_main_t * im = &ip4_main;
1269   ip_lookup_main_t * lm = &im->lookup_main;
1270   clib_error_t * error = 0;
1271   u32 if_address_index, elts_before;
1272   ip4_address_fib_t ip4_af, * addr_fib = 0;
1273
1274   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1275   ip4_addr_fib_init (&ip4_af, address,
1276                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1277   vec_add1 (addr_fib, ip4_af);
1278
1279   /* When adding an address check that it does not conflict with an existing address. */
1280   if (! is_del)
1281     {
1282       ip_interface_address_t * ia;
1283       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1284                                     0 /* honor unnumbered */,
1285       ({
1286         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1287
1288         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1289             || ip4_destination_matches_route (im, x, address, address_length))
1290           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1291                                     format_ip4_address_and_length, address, address_length,
1292                                     format_ip4_address_and_length, x, ia->address_length,
1293                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1294       }));
1295     }
1296
1297   elts_before = pool_elts (lm->if_address_pool);
1298
1299   error = ip_interface_address_add_del
1300     (lm,
1301      sw_if_index,
1302      addr_fib,
1303      address_length,
1304      is_del,
1305      &if_address_index);
1306   if (error)
1307     goto done;
1308   
1309   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1310     {
1311       if (is_del)
1312         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1313                                   address_length);
1314       
1315       else
1316           ip4_add_interface_routes (sw_if_index,
1317                                     im, ip4_af.fib_index,
1318                                     pool_elt_at_index 
1319                                     (lm->if_address_pool, if_address_index));
1320     }
1321
1322   /* If pool did not grow/shrink: add duplicate address. */
1323   if (elts_before != pool_elts (lm->if_address_pool))
1324     {
1325       ip4_add_del_interface_address_callback_t * cb;
1326       vec_foreach (cb, im->add_del_interface_address_callbacks)
1327         cb->function (im, cb->function_opaque, sw_if_index,
1328                       address, address_length,
1329                       if_address_index,
1330                       is_del);
1331     }
1332
1333  done:
1334   vec_free (addr_fib);
1335   return error;
1336 }
1337
1338 clib_error_t *
1339 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1340                                ip4_address_t * address, u32 address_length,
1341                                u32 is_del)
1342 {
1343   return ip4_add_del_interface_address_internal
1344     (vm, sw_if_index, address, address_length,
1345      /* redistribute */ 1,
1346      /* insert_routes */ 1,
1347      is_del);
1348 }
1349
1350 static clib_error_t *
1351 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1352                                 u32 sw_if_index,
1353                                 u32 flags)
1354 {
1355   ip4_main_t * im = &ip4_main;
1356   ip_interface_address_t * ia;
1357   ip4_address_t * a;
1358   u32 is_admin_up, fib_index;
1359   
1360   /* Fill in lookup tables with default table (0). */
1361   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1362   
1363   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1364   
1365   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1366   
1367   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1368
1369   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1370                                 0 /* honor unnumbered */,
1371   ({
1372     a = ip_interface_address_get_address (&im->lookup_main, ia);
1373     if (is_admin_up)
1374       ip4_add_interface_routes (sw_if_index,
1375                                 im, fib_index,
1376                                 ia);
1377     else
1378       ip4_del_interface_routes (im, fib_index,
1379                                 a, ia->address_length);
1380   }));
1381
1382   return 0;
1383 }
1384  
1385 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1386
1387 /* Built-in ip4 unicast rx feature path definition */
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1389   .node_name = "ip4-inacl", 
1390   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
1391   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1392 };
1393
1394 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1395   .node_name = "ip4-source-check-via-rx",
1396   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
1397   .feature_index = 
1398   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1399 };
1400
1401 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1402   .node_name = "ip4-source-check-via-any",
1403   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1404   .feature_index = 
1405   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1406 };
1407
1408 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
1409   .node_name = "ip4-source-and-port-range-check-rx",
1410   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1411   .feature_index =
1412   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1413 };
1414
1415 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1416   .node_name = "ip4-policer-classify",
1417   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
1418   .feature_index =
1419   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1420 };
1421
1422 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1423   .node_name = "ipsec-input-ip4",
1424   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
1425   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1426 };
1427
1428 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1429   .node_name = "vpath-input-ip4",
1430   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
1431   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1432 };
1433
1434 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1435   .node_name = "ip4-lookup",
1436   .runs_before = 0, /* not before any other features */
1437   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1438 };
1439
1440 /* Built-in ip4 multicast rx feature path definition */
1441 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1442   .node_name = "vpath-input-ip4",
1443   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
1444   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1445 };
1446
1447 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1448   .node_name = "ip4-lookup-multicast",
1449   .runs_before = 0, /* not before any other features */
1450   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1451 };
1452
1453 static char * rx_feature_start_nodes[] = 
1454   { "ip4-input", "ip4-input-no-checksum"};
1455
1456 static char * tx_feature_start_nodes[] = 
1457 { "ip4-rewrite-transit"};
1458
1459 /* Source and port-range check ip4 tx feature path definition */
1460 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1461   .node_name = "ip4-source-and-port-range-check-tx",
1462   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
1463   .feature_index =
1464   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
1465
1466 };
1467
1468 /* Built-in ip4 tx feature path definition */
1469 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
1470   .node_name = "interface-output",
1471   .runs_before = 0, /* not before any other features */
1472   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
1473 };
1474
1475
1476 static clib_error_t *
1477 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1478 {
1479   ip_lookup_main_t * lm = &im->lookup_main;
1480   clib_error_t * error;
1481   vnet_cast_t cast;
1482   ip_config_main_t * cm;
1483   vnet_config_main_t * vcm;
1484   char **feature_start_nodes;
1485   int feature_start_len;
1486
1487   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1488     {
1489       cm = &lm->feature_config_mains[cast];
1490       vcm = &cm->config_main;
1491
1492       if (cast < VNET_IP_TX_FEAT)
1493         {
1494           feature_start_nodes = rx_feature_start_nodes;
1495           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
1496         }
1497       else
1498         {
1499           feature_start_nodes = tx_feature_start_nodes;
1500           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
1501         }
1502       
1503       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1504                                          feature_start_nodes,
1505                                          feature_start_len,
1506                                          cast,
1507                                          1 /* is_ip4 */)))
1508         return error;
1509     }
1510
1511   return 0;
1512 }
1513
1514 static clib_error_t *
1515 ip4_sw_interface_add_del (vnet_main_t * vnm,
1516                           u32 sw_if_index,
1517                           u32 is_add)
1518 {
1519   vlib_main_t * vm = vnm->vlib_main;
1520   ip4_main_t * im = &ip4_main;
1521   ip_lookup_main_t * lm = &im->lookup_main;
1522   u32 ci, cast;
1523   u32 feature_index;
1524
1525   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1526     {
1527       ip_config_main_t * cm = &lm->feature_config_mains[cast];
1528       vnet_config_main_t * vcm = &cm->config_main;
1529
1530       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1531       ci = cm->config_index_by_sw_if_index[sw_if_index];
1532
1533       if (cast == VNET_IP_RX_UNICAST_FEAT)
1534         feature_index = im->ip4_unicast_rx_feature_lookup;
1535       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1536         feature_index = im->ip4_multicast_rx_feature_lookup;
1537       else
1538         feature_index = im->ip4_tx_feature_interface_output;
1539
1540       if (is_add)
1541         ci = vnet_config_add_feature (vm, vcm, 
1542                                       ci,
1543                                       feature_index,
1544                                       /* config data */ 0,
1545                                       /* # bytes of config data */ 0);
1546       else
1547         ci = vnet_config_del_feature (vm, vcm,
1548                                       ci,
1549                                       feature_index,
1550                                       /* config data */ 0,
1551                                       /* # bytes of config data */ 0);
1552
1553       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1554       /* 
1555        * note: do not update the tx feature count here.
1556        */
1557     }
1558
1559   return /* no error */ 0;
1560 }
1561
1562 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1563
1564 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1565
1566 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1567   .function = ip4_lookup,
1568   .name = "ip4-lookup",
1569   .vector_size = sizeof (u32),
1570
1571   .format_trace = format_ip4_lookup_trace,
1572
1573   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1574   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1575 };
1576
1577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
1578
1579 static uword
1580 ip4_indirect (vlib_main_t * vm,
1581                vlib_node_runtime_t * node,
1582                vlib_frame_t * frame)
1583 {
1584   return ip4_lookup_inline (vm, node, frame,
1585                             /* lookup_for_responses_to_locally_received_packets */ 0,
1586                             /* is_indirect */ 1);
1587 }
1588
1589 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1590   .function = ip4_indirect,
1591   .name = "ip4-indirect",
1592   .vector_size = sizeof (u32),
1593   .sibling_of = "ip4-lookup",
1594   .format_trace = format_ip4_lookup_trace,
1595
1596   .n_next_nodes = 0,
1597 };
1598
1599 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
1600
1601
1602 /* Global IP4 main. */
1603 ip4_main_t ip4_main;
1604
1605 clib_error_t *
1606 ip4_lookup_init (vlib_main_t * vm)
1607 {
1608   ip4_main_t * im = &ip4_main;
1609   clib_error_t * error;
1610   uword i;
1611
1612   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1613     {
1614       u32 m;
1615
1616       if (i < 32)
1617         m = pow2_mask (i) << (32 - i);
1618       else 
1619         m = ~0;
1620       im->fib_masks[i] = clib_host_to_net_u32 (m);
1621     }
1622
1623   /* Create FIB with index 0 and table id of 0. */
1624   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1625
1626   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1627
1628   {
1629     pg_node_t * pn;
1630     pn = pg_get_node (ip4_lookup_node.index);
1631     pn->unformat_edit = unformat_pg_ip4_header;
1632   }
1633
1634   {
1635     ethernet_arp_header_t h;
1636
1637     memset (&h, 0, sizeof (h));
1638
1639     /* Set target ethernet address to all zeros. */
1640     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1641
1642 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1643 #define _8(f,v) h.f = v;
1644     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1645     _16 (l3_type, ETHERNET_TYPE_IP4);
1646     _8 (n_l2_address_bytes, 6);
1647     _8 (n_l3_address_bytes, 4);
1648     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1649 #undef _16
1650 #undef _8
1651
1652     vlib_packet_template_init (vm,
1653                                &im->ip4_arp_request_packet_template,
1654                                /* data */ &h,
1655                                sizeof (h),
1656                                /* alloc chunk size */ 8,
1657                                "ip4 arp");
1658   }
1659
1660   error = ip4_feature_init (vm, im);
1661
1662   return error;
1663 }
1664
1665 VLIB_INIT_FUNCTION (ip4_lookup_init);
1666
1667 typedef struct {
1668   /* Adjacency taken. */
1669   u32 adj_index;
1670   u32 flow_hash;
1671   u32 fib_index;
1672
1673   /* Packet data, possibly *after* rewrite. */
1674   u8 packet_data[64 - 1*sizeof(u32)];
1675 } ip4_forward_next_trace_t;
1676
1677 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1678 {
1679   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1680   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1681   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1682   uword indent = format_get_indent (s);
1683   s = format (s, "%U%U",
1684                 format_white_space, indent,
1685                 format_ip4_header, t->packet_data);
1686   return s;
1687 }
1688
1689 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1690 {
1691   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1692   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1693   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1694   vnet_main_t * vnm = vnet_get_main();
1695   ip4_main_t * im = &ip4_main;
1696   uword indent = format_get_indent (s);
1697
1698   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1699               t->fib_index, t->adj_index, format_ip_adjacency,
1700               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1701   s = format (s, "\n%U%U",
1702               format_white_space, indent,
1703               format_ip4_header, t->packet_data);
1704   return s;
1705 }
1706
1707 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1708 {
1709   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1710   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1711   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1712   vnet_main_t * vnm = vnet_get_main();
1713   ip4_main_t * im = &ip4_main;
1714   uword indent = format_get_indent (s);
1715
1716   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1717               t->fib_index, t->adj_index, format_ip_adjacency,
1718               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1719   s = format (s, "\n%U%U",
1720               format_white_space, indent,
1721               format_ip_adjacency_packet_data,
1722               vnm, &im->lookup_main, t->adj_index,
1723               t->packet_data, sizeof (t->packet_data));
1724   return s;
1725 }
1726
1727 /* Common trace function for all ip4-forward next nodes. */
1728 void
1729 ip4_forward_next_trace (vlib_main_t * vm,
1730                         vlib_node_runtime_t * node,
1731                         vlib_frame_t * frame,
1732                         vlib_rx_or_tx_t which_adj_index)
1733 {
1734   u32 * from, n_left;
1735   ip4_main_t * im = &ip4_main;
1736
1737   n_left = frame->n_vectors;
1738   from = vlib_frame_vector_args (frame);
1739   
1740   while (n_left >= 4)
1741     {
1742       u32 bi0, bi1;
1743       vlib_buffer_t * b0, * b1;
1744       ip4_forward_next_trace_t * t0, * t1;
1745
1746       /* Prefetch next iteration. */
1747       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1748       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1749
1750       bi0 = from[0];
1751       bi1 = from[1];
1752
1753       b0 = vlib_get_buffer (vm, bi0);
1754       b1 = vlib_get_buffer (vm, bi1);
1755
1756       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1757         {
1758           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1759           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1760           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1761           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1762               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1763               vec_elt (im->fib_index_by_sw_if_index,
1764                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1765
1766           clib_memcpy (t0->packet_data,
1767                   vlib_buffer_get_current (b0),
1768                   sizeof (t0->packet_data));
1769         }
1770       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1771         {
1772           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1773           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1774           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1775           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1776               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1777               vec_elt (im->fib_index_by_sw_if_index,
1778                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1779           clib_memcpy (t1->packet_data,
1780                   vlib_buffer_get_current (b1),
1781                   sizeof (t1->packet_data));
1782         }
1783       from += 2;
1784       n_left -= 2;
1785     }
1786
1787   while (n_left >= 1)
1788     {
1789       u32 bi0;
1790       vlib_buffer_t * b0;
1791       ip4_forward_next_trace_t * t0;
1792
1793       bi0 = from[0];
1794
1795       b0 = vlib_get_buffer (vm, bi0);
1796
1797       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1798         {
1799           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1800           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1801           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1802           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1803               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1804               vec_elt (im->fib_index_by_sw_if_index,
1805                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1806           clib_memcpy (t0->packet_data,
1807                   vlib_buffer_get_current (b0),
1808                   sizeof (t0->packet_data));
1809         }
1810       from += 1;
1811       n_left -= 1;
1812     }
1813 }
1814
1815 static uword
1816 ip4_drop_or_punt (vlib_main_t * vm,
1817                   vlib_node_runtime_t * node,
1818                   vlib_frame_t * frame,
1819                   ip4_error_t error_code)
1820 {
1821   u32 * buffers = vlib_frame_vector_args (frame);
1822   uword n_packets = frame->n_vectors;
1823
1824   vlib_error_drop_buffers (vm, node,
1825                            buffers,
1826                            /* stride */ 1,
1827                            n_packets,
1828                            /* next */ 0,
1829                            ip4_input_node.index,
1830                            error_code);
1831
1832   if (node->flags & VLIB_NODE_FLAG_TRACE)
1833     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1834
1835   return n_packets;
1836 }
1837
1838 static uword
1839 ip4_drop (vlib_main_t * vm,
1840           vlib_node_runtime_t * node,
1841           vlib_frame_t * frame)
1842 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1843
1844 static uword
1845 ip4_punt (vlib_main_t * vm,
1846           vlib_node_runtime_t * node,
1847           vlib_frame_t * frame)
1848 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1849
1850 static uword
1851 ip4_miss (vlib_main_t * vm,
1852           vlib_node_runtime_t * node,
1853           vlib_frame_t * frame)
1854 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1855
1856 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1857   .function = ip4_drop,
1858   .name = "ip4-drop",
1859   .vector_size = sizeof (u32),
1860
1861   .format_trace = format_ip4_forward_next_trace,
1862
1863   .n_next_nodes = 1,
1864   .next_nodes = {
1865     [0] = "error-drop",
1866   },
1867 };
1868
1869 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1870
1871 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1872   .function = ip4_punt,
1873   .name = "ip4-punt",
1874   .vector_size = sizeof (u32),
1875
1876   .format_trace = format_ip4_forward_next_trace,
1877
1878   .n_next_nodes = 1,
1879   .next_nodes = {
1880     [0] = "error-punt",
1881   },
1882 };
1883
1884 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1885
1886 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1887   .function = ip4_miss,
1888   .name = "ip4-miss",
1889   .vector_size = sizeof (u32),
1890
1891   .format_trace = format_ip4_forward_next_trace,
1892
1893   .n_next_nodes = 1,
1894   .next_nodes = {
1895     [0] = "error-drop",
1896   },
1897 };
1898
1899 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
1900
1901 /* Compute TCP/UDP/ICMP4 checksum in software. */
1902 u16
1903 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1904                               ip4_header_t * ip0)
1905 {
1906   ip_csum_t sum0;
1907   u32 ip_header_length, payload_length_host_byte_order;
1908   u32 n_this_buffer, n_bytes_left;
1909   u16 sum16;
1910   void * data_this_buffer;
1911   
1912   /* Initialize checksum with ip header. */
1913   ip_header_length = ip4_header_bytes (ip0);
1914   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1915   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1916
1917   if (BITS (uword) == 32)
1918     {
1919       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1920       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1921     }
1922   else
1923     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1924
1925   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1926   data_this_buffer = (void *) ip0 + ip_header_length;
1927   if (n_this_buffer + ip_header_length > p0->current_length)
1928     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1929   while (1)
1930     {
1931       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1932       n_bytes_left -= n_this_buffer;
1933       if (n_bytes_left == 0)
1934         break;
1935
1936       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1937       p0 = vlib_get_buffer (vm, p0->next_buffer);
1938       data_this_buffer = vlib_buffer_get_current (p0);
1939       n_this_buffer = p0->current_length;
1940     }
1941
1942   sum16 = ~ ip_csum_fold (sum0);
1943
1944   return sum16;
1945 }
1946
1947 static u32
1948 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1949 {
1950   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1951   udp_header_t * udp0;
1952   u16 sum16;
1953
1954   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1955           || ip0->protocol == IP_PROTOCOL_UDP);
1956
1957   udp0 = (void *) (ip0 + 1);
1958   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1959     {
1960       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1961                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1962       return p0->flags;
1963     }
1964
1965   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1966
1967   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1968                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1969
1970   return p0->flags;
1971 }
1972
1973 static uword
1974 ip4_local (vlib_main_t * vm,
1975            vlib_node_runtime_t * node,
1976            vlib_frame_t * frame)
1977 {
1978   ip4_main_t * im = &ip4_main;
1979   ip_lookup_main_t * lm = &im->lookup_main;
1980   ip_local_next_t next_index;
1981   u32 * from, * to_next, n_left_from, n_left_to_next;
1982   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1983
1984   from = vlib_frame_vector_args (frame);
1985   n_left_from = frame->n_vectors;
1986   next_index = node->cached_next_index;
1987   
1988   if (node->flags & VLIB_NODE_FLAG_TRACE)
1989     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1990
1991   while (n_left_from > 0)
1992     {
1993       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1994
1995       while (n_left_from >= 4 && n_left_to_next >= 2)
1996         {
1997           vlib_buffer_t * p0, * p1;
1998           ip4_header_t * ip0, * ip1;
1999           udp_header_t * udp0, * udp1;
2000           ip4_fib_mtrie_t * mtrie0, * mtrie1;
2001           ip4_fib_mtrie_leaf_t leaf0, leaf1;
2002           ip_adjacency_t * adj0, * adj1;
2003           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2004           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2005           i32 len_diff0, len_diff1;
2006           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2007           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2008           u8 enqueue_code;
2009       
2010           pi0 = to_next[0] = from[0];
2011           pi1 = to_next[1] = from[1];
2012           from += 2;
2013           n_left_from -= 2;
2014           to_next += 2;
2015           n_left_to_next -= 2;
2016       
2017           p0 = vlib_get_buffer (vm, pi0);
2018           p1 = vlib_get_buffer (vm, pi1);
2019
2020           ip0 = vlib_buffer_get_current (p0);
2021           ip1 = vlib_buffer_get_current (p1);
2022
2023           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2024                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2025           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
2026                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2027
2028           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2029           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2030
2031           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2032
2033           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2034           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2035
2036           /* Treat IP frag packets as "experimental" protocol for now
2037              until support of IP frag reassembly is implemented */
2038           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2039           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
2040           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2041           is_udp1 = proto1 == IP_PROTOCOL_UDP;
2042           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2043           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2044
2045           flags0 = p0->flags;
2046           flags1 = p1->flags;
2047
2048           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2049           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2050
2051           udp0 = ip4_next_header (ip0);
2052           udp1 = ip4_next_header (ip1);
2053
2054           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2055           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2056           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2057
2058           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2059           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2060
2061           /* Verify UDP length. */
2062           ip_len0 = clib_net_to_host_u16 (ip0->length);
2063           ip_len1 = clib_net_to_host_u16 (ip1->length);
2064           udp_len0 = clib_net_to_host_u16 (udp0->length);
2065           udp_len1 = clib_net_to_host_u16 (udp1->length);
2066
2067           len_diff0 = ip_len0 - udp_len0;
2068           len_diff1 = ip_len1 - udp_len1;
2069
2070           len_diff0 = is_udp0 ? len_diff0 : 0;
2071           len_diff1 = is_udp1 ? len_diff1 : 0;
2072
2073           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2074                                 & good_tcp_udp0 & good_tcp_udp1)))
2075             {
2076               if (is_tcp_udp0)
2077                 {
2078                   if (is_tcp_udp0
2079                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2080                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2081                   good_tcp_udp0 =
2082                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2083                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2084                 }
2085               if (is_tcp_udp1)
2086                 {
2087                   if (is_tcp_udp1
2088                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2089                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2090                   good_tcp_udp1 =
2091                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2092                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2093                 }
2094             }
2095
2096           good_tcp_udp0 &= len_diff0 >= 0;
2097           good_tcp_udp1 &= len_diff1 >= 0;
2098
2099           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2100           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2101
2102           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2103
2104           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2105           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2106
2107           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2108           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2109                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2110                     : error0);
2111           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2112                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2113                     : error1);
2114
2115           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2116           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2117
2118           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2119           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2120
2121           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2122           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2123
2124           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2125                                                            &ip0->src_address,
2126                                                            /* no_default_route */ 1));
2127           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2128                                                            &ip1->src_address,
2129                                                            /* no_default_route */ 1));
2130
2131           adj0 = ip_get_adjacency (lm, adj_index0);
2132           adj1 = ip_get_adjacency (lm, adj_index1);
2133
2134           /* 
2135            * Must have a route to source otherwise we drop the packet.
2136            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2137            */
2138           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2139                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2140                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2141                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2142                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2143                     ? IP4_ERROR_SRC_LOOKUP_MISS
2144                     : error0);
2145           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2146                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2147                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2148                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2149                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2150                     ? IP4_ERROR_SRC_LOOKUP_MISS
2151                     : error1);
2152
2153           next0 = lm->local_next_by_ip_protocol[proto0];
2154           next1 = lm->local_next_by_ip_protocol[proto1];
2155
2156           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2157           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2158
2159           p0->error = error0 ? error_node->errors[error0] : 0;
2160           p1->error = error1 ? error_node->errors[error1] : 0;
2161
2162           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2163
2164           if (PREDICT_FALSE (enqueue_code != 0))
2165             {
2166               switch (enqueue_code)
2167                 {
2168                 case 1:
2169                   /* A B A */
2170                   to_next[-2] = pi1;
2171                   to_next -= 1;
2172                   n_left_to_next += 1;
2173                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2174                   break;
2175
2176                 case 2:
2177                   /* A A B */
2178                   to_next -= 1;
2179                   n_left_to_next += 1;
2180                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2181                   break;
2182
2183                 case 3:
2184                   /* A B B or A B C */
2185                   to_next -= 2;
2186                   n_left_to_next += 2;
2187                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2188                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2189                   if (next0 == next1)
2190                     {
2191                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2192                       next_index = next1;
2193                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2194                     }
2195                   break;
2196                 }
2197             }
2198         }
2199
2200       while (n_left_from > 0 && n_left_to_next > 0)
2201         {
2202           vlib_buffer_t * p0;
2203           ip4_header_t * ip0;
2204           udp_header_t * udp0;
2205           ip4_fib_mtrie_t * mtrie0;
2206           ip4_fib_mtrie_leaf_t leaf0;
2207           ip_adjacency_t * adj0;
2208           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2209           i32 len_diff0;
2210           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2211       
2212           pi0 = to_next[0] = from[0];
2213           from += 1;
2214           n_left_from -= 1;
2215           to_next += 1;
2216           n_left_to_next -= 1;
2217       
2218           p0 = vlib_get_buffer (vm, pi0);
2219
2220           ip0 = vlib_buffer_get_current (p0);
2221
2222           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2223                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2224
2225           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2226
2227           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2228
2229           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2230
2231           /* Treat IP frag packets as "experimental" protocol for now
2232              until support of IP frag reassembly is implemented */
2233           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2234           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2235           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2236
2237           flags0 = p0->flags;
2238
2239           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2240
2241           udp0 = ip4_next_header (ip0);
2242
2243           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2244           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2245
2246           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2247
2248           /* Verify UDP length. */
2249           ip_len0 = clib_net_to_host_u16 (ip0->length);
2250           udp_len0 = clib_net_to_host_u16 (udp0->length);
2251
2252           len_diff0 = ip_len0 - udp_len0;
2253
2254           len_diff0 = is_udp0 ? len_diff0 : 0;
2255
2256           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2257             {
2258               if (is_tcp_udp0)
2259                 {
2260                   if (is_tcp_udp0
2261                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2262                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2263                   good_tcp_udp0 =
2264                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2265                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2266                 }
2267             }
2268
2269           good_tcp_udp0 &= len_diff0 >= 0;
2270
2271           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2272
2273           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2274
2275           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2276
2277           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2278           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2279                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2280                     : error0);
2281
2282           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2283
2284           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2285           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2286
2287           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2288                                                            &ip0->src_address,
2289                                                            /* no_default_route */ 1));
2290
2291           adj0 = ip_get_adjacency (lm, adj_index0);
2292
2293           /* Must have a route to source otherwise we drop the packet. */
2294           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2295                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2296                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2297                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2298                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2299                     ? IP4_ERROR_SRC_LOOKUP_MISS
2300                     : error0);
2301
2302           next0 = lm->local_next_by_ip_protocol[proto0];
2303
2304           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2305
2306           p0->error = error0? error_node->errors[error0] : 0;
2307
2308           if (PREDICT_FALSE (next0 != next_index))
2309             {
2310               n_left_to_next += 1;
2311               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2312
2313               next_index = next0;
2314               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2315               to_next[0] = pi0;
2316               to_next += 1;
2317               n_left_to_next -= 1;
2318             }
2319         }
2320   
2321       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2322     }
2323
2324   return frame->n_vectors;
2325 }
2326
2327 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2328   .function = ip4_local,
2329   .name = "ip4-local",
2330   .vector_size = sizeof (u32),
2331
2332   .format_trace = format_ip4_forward_next_trace,
2333
2334   .n_next_nodes = IP_LOCAL_N_NEXT,
2335   .next_nodes = {
2336     [IP_LOCAL_NEXT_DROP] = "error-drop",
2337     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2338     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2339     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2340   },
2341 };
2342
2343 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
2344
2345 void ip4_register_protocol (u32 protocol, u32 node_index)
2346 {
2347   vlib_main_t * vm = vlib_get_main();
2348   ip4_main_t * im = &ip4_main;
2349   ip_lookup_main_t * lm = &im->lookup_main;
2350
2351   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2352   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2353 }
2354
2355 static clib_error_t *
2356 show_ip_local_command_fn (vlib_main_t * vm,
2357                           unformat_input_t * input,
2358                          vlib_cli_command_t * cmd)
2359 {
2360   ip4_main_t * im = &ip4_main;
2361   ip_lookup_main_t * lm = &im->lookup_main;
2362   int i;
2363
2364   vlib_cli_output (vm, "Protocols handled by ip4_local");
2365   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2366     {
2367       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2368         vlib_cli_output (vm, "%d", i);
2369     }
2370   return 0;
2371 }
2372
2373
2374
2375 VLIB_CLI_COMMAND (show_ip_local, static) = {
2376   .path = "show ip local",
2377   .function = show_ip_local_command_fn,
2378   .short_help = "Show ip local protocol table",
2379 };
2380
2381 static uword
2382 ip4_arp (vlib_main_t * vm,
2383          vlib_node_runtime_t * node,
2384          vlib_frame_t * frame)
2385 {
2386   vnet_main_t * vnm = vnet_get_main();
2387   ip4_main_t * im = &ip4_main;
2388   ip_lookup_main_t * lm = &im->lookup_main;
2389   u32 * from, * to_next_drop;
2390   uword n_left_from, n_left_to_next_drop, next_index;
2391   static f64 time_last_seed_change = -1e100;
2392   static u32 hash_seeds[3];
2393   static uword hash_bitmap[256 / BITS (uword)]; 
2394   f64 time_now;
2395
2396   if (node->flags & VLIB_NODE_FLAG_TRACE)
2397     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2398
2399   time_now = vlib_time_now (vm);
2400   if (time_now - time_last_seed_change > 1e-3)
2401     {
2402       uword i;
2403       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2404                                              sizeof (hash_seeds));
2405       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2406         hash_seeds[i] = r[i];
2407
2408       /* Mark all hash keys as been no-seen before. */
2409       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2410         hash_bitmap[i] = 0;
2411
2412       time_last_seed_change = time_now;
2413     }
2414
2415   from = vlib_frame_vector_args (frame);
2416   n_left_from = frame->n_vectors;
2417   next_index = node->cached_next_index;
2418   if (next_index == IP4_ARP_NEXT_DROP)
2419     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2420
2421   while (n_left_from > 0)
2422     {
2423       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2424                            to_next_drop, n_left_to_next_drop);
2425
2426       while (n_left_from > 0 && n_left_to_next_drop > 0)
2427         {
2428           vlib_buffer_t * p0;
2429           ip4_header_t * ip0;
2430           ethernet_header_t * eh0;
2431           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2432           uword bm0;
2433           ip_adjacency_t * adj0;
2434
2435           pi0 = from[0];
2436
2437           p0 = vlib_get_buffer (vm, pi0);
2438
2439           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2440           adj0 = ip_get_adjacency (lm, adj_index0);
2441           ip0 = vlib_buffer_get_current (p0);
2442
2443           /* If packet destination is not local, send ARP to next hop */
2444           if (adj0->arp.next_hop.ip4.as_u32)
2445             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2446
2447           /* 
2448            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2449            * rewrite to this packet, we need to skip it here.
2450            * Note, to distinguish from src IP addr *.8.6.*, we
2451            * check for a bcast eth dest instead of IPv4 version.
2452            */
2453           eh0 = (ethernet_header_t*)ip0;
2454           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2455             {
2456               u32 vlan_num = 0;
2457               u16 * etype = &eh0->type;
2458               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2459                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2460                 {
2461                   vlan_num += 1;
2462                   etype += 2; //vlan tag also 16 bits, same as etype
2463                 }
2464               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2465                 {
2466                   vlib_buffer_advance (
2467                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2468                   ip0 = vlib_buffer_get_current (p0);
2469                 }
2470             }
2471
2472           a0 = hash_seeds[0];
2473           b0 = hash_seeds[1];
2474           c0 = hash_seeds[2];
2475
2476           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2477           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2478
2479           a0 ^= ip0->dst_address.data_u32;
2480           b0 ^= sw_if_index0;
2481
2482           hash_v3_finalize32 (a0, b0, c0);
2483
2484           c0 &= BITS (hash_bitmap) - 1;
2485           c0 = c0 / BITS (uword);
2486           m0 = (uword) 1 << (c0 % BITS (uword));
2487
2488           bm0 = hash_bitmap[c0];
2489           drop0 = (bm0 & m0) != 0;
2490
2491           /* Mark it as seen. */
2492           hash_bitmap[c0] = bm0 | m0;
2493
2494           from += 1;
2495           n_left_from -= 1;
2496           to_next_drop[0] = pi0;
2497           to_next_drop += 1;
2498           n_left_to_next_drop -= 1;
2499
2500           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2501
2502           if (drop0)
2503             continue;
2504
2505           /* 
2506            * Can happen if the control-plane is programming tables
2507            * with traffic flowing; at least that's today's lame excuse.
2508            */
2509           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2510             {
2511               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2512             }
2513           else
2514           /* Send ARP request. */
2515           {
2516             u32 bi0 = 0;
2517             vlib_buffer_t * b0;
2518             ethernet_arp_header_t * h0;
2519             vnet_hw_interface_t * hw_if0;
2520
2521             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2522
2523             /* Add rewrite/encap string for ARP packet. */
2524             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2525
2526             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2527
2528             /* Src ethernet address in ARP header. */
2529             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2530                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2531
2532             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2533                 //No source address available
2534                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2535                 vlib_buffer_free(vm, &bi0, 1);
2536                 continue;
2537             }
2538
2539             /* Copy in destination address we are requesting. */
2540             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2541
2542             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2543             b0 = vlib_get_buffer (vm, bi0);
2544             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2545
2546             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2547
2548             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2549           }
2550         }
2551
2552       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2553     }
2554
2555   return frame->n_vectors;
2556 }
2557
2558 static char * ip4_arp_error_strings[] = {
2559   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2560   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2561   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2562   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2563   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2564   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2565 };
2566
2567 VLIB_REGISTER_NODE (ip4_arp_node) = {
2568   .function = ip4_arp,
2569   .name = "ip4-arp",
2570   .vector_size = sizeof (u32),
2571
2572   .format_trace = format_ip4_forward_next_trace,
2573
2574   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2575   .error_strings = ip4_arp_error_strings,
2576
2577   .n_next_nodes = IP4_ARP_N_NEXT,
2578   .next_nodes = {
2579     [IP4_ARP_NEXT_DROP] = "error-drop",
2580   },
2581 };
2582
2583 #define foreach_notrace_ip4_arp_error           \
2584 _(DROP)                                         \
2585 _(REQUEST_SENT)                                 \
2586 _(REPLICATE_DROP)                               \
2587 _(REPLICATE_FAIL)
2588
2589 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2590 {
2591   vlib_node_runtime_t *rt = 
2592     vlib_node_get_runtime (vm, ip4_arp_node.index);
2593
2594   /* don't trace ARP request packets */
2595 #define _(a)                                    \
2596     vnet_pcap_drop_trace_filter_add_del         \
2597         (rt->errors[IP4_ARP_ERROR_##a],         \
2598          1 /* is_add */);
2599     foreach_notrace_ip4_arp_error;
2600 #undef _
2601   return 0;
2602 }
2603
2604 VLIB_INIT_FUNCTION(arp_notrace_init);
2605
2606
2607 /* Send an ARP request to see if given destination is reachable on given interface. */
2608 clib_error_t *
2609 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2610 {
2611   vnet_main_t * vnm = vnet_get_main();
2612   ip4_main_t * im = &ip4_main;
2613   ethernet_arp_header_t * h;
2614   ip4_address_t * src;
2615   ip_interface_address_t * ia;
2616   ip_adjacency_t * adj;
2617   vnet_hw_interface_t * hi;
2618   vnet_sw_interface_t * si;
2619   vlib_buffer_t * b;
2620   u32 bi = 0;
2621
2622   si = vnet_get_sw_interface (vnm, sw_if_index);
2623
2624   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2625     {
2626       return clib_error_return (0, "%U: interface %U down",
2627                                 format_ip4_address, dst, 
2628                                 format_vnet_sw_if_index_name, vnm, 
2629                                 sw_if_index);
2630     }
2631
2632   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2633   if (! src)
2634     {
2635       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2636       return clib_error_return 
2637         (0, "no matching interface address for destination %U (interface %U)",
2638          format_ip4_address, dst,
2639          format_vnet_sw_if_index_name, vnm, sw_if_index);
2640     }
2641
2642   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2643
2644   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2645
2646   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2647
2648   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2649
2650   h->ip4_over_ethernet[0].ip4 = src[0];
2651   h->ip4_over_ethernet[1].ip4 = dst[0];
2652
2653   b = vlib_get_buffer (vm, bi);
2654   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2655
2656   /* Add encapsulation string for software interface (e.g. ethernet header). */
2657   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2658   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2659
2660   {
2661     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2662     u32 * to_next = vlib_frame_vector_args (f);
2663     to_next[0] = bi;
2664     f->n_vectors = 1;
2665     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2666   }
2667
2668   return /* no error */ 0;
2669 }
2670
2671 typedef enum {
2672   IP4_REWRITE_NEXT_DROP,
2673   IP4_REWRITE_NEXT_ARP,
2674   IP4_REWRITE_NEXT_ICMP_ERROR,
2675 } ip4_rewrite_next_t;
2676
2677 always_inline uword
2678 ip4_rewrite_inline (vlib_main_t * vm,
2679                     vlib_node_runtime_t * node,
2680                     vlib_frame_t * frame,
2681                     int rewrite_for_locally_received_packets)
2682 {
2683   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2684   u32 * from = vlib_frame_vector_args (frame);
2685   u32 n_left_from, n_left_to_next, * to_next, next_index;
2686   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2687   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2688   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2689
2690   n_left_from = frame->n_vectors;
2691   next_index = node->cached_next_index;
2692   u32 cpu_index = os_get_cpu_number();
2693   
2694   while (n_left_from > 0)
2695     {
2696       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2697
2698       while (n_left_from >= 4 && n_left_to_next >= 2)
2699         {
2700           ip_adjacency_t * adj0, * adj1;
2701           vlib_buffer_t * p0, * p1;
2702           ip4_header_t * ip0, * ip1;
2703           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2704           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2705           u32 next0_override, next1_override;
2706           u32 tx_sw_if_index0, tx_sw_if_index1;
2707       
2708           if (rewrite_for_locally_received_packets)
2709               next0_override = next1_override = 0;
2710
2711           /* Prefetch next iteration. */
2712           {
2713             vlib_buffer_t * p2, * p3;
2714
2715             p2 = vlib_get_buffer (vm, from[2]);
2716             p3 = vlib_get_buffer (vm, from[3]);
2717
2718             vlib_prefetch_buffer_header (p2, STORE);
2719             vlib_prefetch_buffer_header (p3, STORE);
2720
2721             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2722             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2723           }
2724
2725           pi0 = to_next[0] = from[0];
2726           pi1 = to_next[1] = from[1];
2727
2728           from += 2;
2729           n_left_from -= 2;
2730           to_next += 2;
2731           n_left_to_next -= 2;
2732       
2733           p0 = vlib_get_buffer (vm, pi0);
2734           p1 = vlib_get_buffer (vm, pi1);
2735
2736           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2737           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2738
2739           /* We should never rewrite a pkt using the MISS adjacency */
2740           ASSERT(adj_index0 && adj_index1);
2741
2742           ip0 = vlib_buffer_get_current (p0);
2743           ip1 = vlib_buffer_get_current (p1);
2744
2745           error0 = error1 = IP4_ERROR_NONE;
2746           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2747
2748           /* Decrement TTL & update checksum.
2749              Works either endian, so no need for byte swap. */
2750           if (! rewrite_for_locally_received_packets)
2751             {
2752               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2753
2754               /* Input node should have reject packets with ttl 0. */
2755               ASSERT (ip0->ttl > 0);
2756               ASSERT (ip1->ttl > 0);
2757
2758               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2759               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2760
2761               checksum0 += checksum0 >= 0xffff;
2762               checksum1 += checksum1 >= 0xffff;
2763
2764               ip0->checksum = checksum0;
2765               ip1->checksum = checksum1;
2766
2767               ttl0 -= 1;
2768               ttl1 -= 1;
2769
2770               ip0->ttl = ttl0;
2771               ip1->ttl = ttl1;
2772
2773               /*
2774                * If the ttl drops below 1 when forwarding, generate
2775                * an ICMP response.
2776                */
2777               if (PREDICT_FALSE(ttl0 <= 0))
2778                 {
2779                   error0 = IP4_ERROR_TIME_EXPIRED;
2780                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2781                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2782                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2783                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2784                 }
2785               if (PREDICT_FALSE(ttl1 <= 0))
2786                 {
2787                   error1 = IP4_ERROR_TIME_EXPIRED;
2788                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2789                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2790                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2791                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2792                 }
2793
2794               /* Verify checksum. */
2795               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2796               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2797             }
2798
2799           /* Rewrite packet header and updates lengths. */
2800           adj0 = ip_get_adjacency (lm, adj_index0);
2801           adj1 = ip_get_adjacency (lm, adj_index1);
2802       
2803           if (rewrite_for_locally_received_packets)
2804             {
2805               /*
2806                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2807                * we end up here with a local adjacency in hand
2808                * The local adj rewrite data is 0xfefe on purpose.
2809                * Bad engineer, no donut for you.
2810                */
2811               if (PREDICT_FALSE(adj0->lookup_next_index 
2812                                 == IP_LOOKUP_NEXT_LOCAL))
2813                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2814               if (PREDICT_FALSE(adj0->lookup_next_index
2815                                 == IP_LOOKUP_NEXT_ARP))
2816                 next0_override = IP4_REWRITE_NEXT_ARP;
2817               if (PREDICT_FALSE(adj1->lookup_next_index 
2818                                 == IP_LOOKUP_NEXT_LOCAL))
2819                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2820               if (PREDICT_FALSE(adj1->lookup_next_index
2821                                 == IP_LOOKUP_NEXT_ARP))
2822                 next1_override = IP4_REWRITE_NEXT_ARP;
2823             }
2824
2825           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2826           rw_len0 = adj0[0].rewrite_header.data_bytes;
2827           rw_len1 = adj1[0].rewrite_header.data_bytes;
2828           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2829           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2830
2831           /* Check MTU of outgoing interface. */
2832           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2833                     ? IP4_ERROR_MTU_EXCEEDED
2834                     : error0);
2835           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2836                     ? IP4_ERROR_MTU_EXCEEDED
2837                     : error1);
2838
2839           next0 = (error0 == IP4_ERROR_NONE)
2840             ? adj0[0].rewrite_header.next_index : next0;
2841
2842           if (rewrite_for_locally_received_packets)
2843               next0 = next0 && next0_override ? next0_override : next0;
2844
2845           next1 = (error1 == IP4_ERROR_NONE)
2846             ? adj1[0].rewrite_header.next_index : next1;
2847
2848           if (rewrite_for_locally_received_packets)
2849               next1 = next1 && next1_override ? next1_override : next1;
2850
2851           /* 
2852            * We've already accounted for an ethernet_header_t elsewhere
2853            */
2854           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2855               vlib_increment_combined_counter 
2856                   (&lm->adjacency_counters,
2857                    cpu_index, adj_index0, 
2858                    /* packet increment */ 0,
2859                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2860
2861           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2862               vlib_increment_combined_counter 
2863                   (&lm->adjacency_counters,
2864                    cpu_index, adj_index1, 
2865                    /* packet increment */ 0,
2866                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2867
2868           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2869            * to see the IP headerr */
2870           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2871             {
2872               p0->current_data -= rw_len0;
2873               p0->current_length += rw_len0;
2874               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2875               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2876                   tx_sw_if_index0;
2877
2878               if (PREDICT_FALSE 
2879                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2880                                     tx_sw_if_index0)))
2881                 {
2882                   p0->current_config_index = 
2883                     vec_elt (cm->config_index_by_sw_if_index, 
2884                              tx_sw_if_index0);
2885                   vnet_get_config_data (&cm->config_main,
2886                                         &p0->current_config_index,
2887                                         &next0,
2888                                         /* # bytes of config data */ 0);
2889                 }
2890             }
2891           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2892             {
2893               p1->current_data -= rw_len1;
2894               p1->current_length += rw_len1;
2895
2896               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2897               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2898                   tx_sw_if_index1;
2899
2900               if (PREDICT_FALSE 
2901                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2902                                     tx_sw_if_index1)))
2903                 {
2904                   p1->current_config_index = 
2905                     vec_elt (cm->config_index_by_sw_if_index, 
2906                              tx_sw_if_index1);
2907                   vnet_get_config_data (&cm->config_main,
2908                                         &p1->current_config_index,
2909                                         &next1,
2910                                         /* # bytes of config data */ 0);
2911                 }
2912             }
2913
2914           /* Guess we are only writing on simple Ethernet header. */
2915           vnet_rewrite_two_headers (adj0[0], adj1[0],
2916                                     ip0, ip1,
2917                                     sizeof (ethernet_header_t));
2918       
2919           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2920                                            to_next, n_left_to_next,
2921                                            pi0, pi1, next0, next1);
2922         }
2923
2924       while (n_left_from > 0 && n_left_to_next > 0)
2925         {
2926           ip_adjacency_t * adj0;
2927           vlib_buffer_t * p0;
2928           ip4_header_t * ip0;
2929           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2930           u32 next0_override;
2931           u32 tx_sw_if_index0;
2932       
2933           if (rewrite_for_locally_received_packets)
2934               next0_override = 0;
2935
2936           pi0 = to_next[0] = from[0];
2937
2938           p0 = vlib_get_buffer (vm, pi0);
2939
2940           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2941
2942           /* We should never rewrite a pkt using the MISS adjacency */
2943           ASSERT(adj_index0);
2944
2945           adj0 = ip_get_adjacency (lm, adj_index0);
2946       
2947           ip0 = vlib_buffer_get_current (p0);
2948
2949           error0 = IP4_ERROR_NONE;
2950           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2951
2952           /* Decrement TTL & update checksum. */
2953           if (! rewrite_for_locally_received_packets)
2954             {
2955               i32 ttl0 = ip0->ttl;
2956
2957               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2958
2959               checksum0 += checksum0 >= 0xffff;
2960
2961               ip0->checksum = checksum0;
2962
2963               ASSERT (ip0->ttl > 0);
2964
2965               ttl0 -= 1;
2966
2967               ip0->ttl = ttl0;
2968
2969               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2970
2971               if (PREDICT_FALSE(ttl0 <= 0))
2972                 {
2973                   /*
2974                    * If the ttl drops below 1 when forwarding, generate
2975                    * an ICMP response.
2976                    */
2977                   error0 = IP4_ERROR_TIME_EXPIRED;
2978                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2979                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2980                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2981                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2982                 }
2983             }
2984
2985           if (rewrite_for_locally_received_packets)
2986             {
2987               /*
2988                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2989                * we end up here with a local adjacency in hand
2990                * The local adj rewrite data is 0xfefe on purpose.
2991                * Bad engineer, no donut for you.
2992                */
2993               if (PREDICT_FALSE(adj0->lookup_next_index 
2994                                 == IP_LOOKUP_NEXT_LOCAL))
2995                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2996               /* 
2997                * We have to override the next_index in ARP adjacencies,
2998                * because they're set up for ip4-arp, not this node...
2999                */
3000               if (PREDICT_FALSE(adj0->lookup_next_index
3001                                 == IP_LOOKUP_NEXT_ARP))
3002                 next0_override = IP4_REWRITE_NEXT_ARP;
3003             }
3004
3005           /* Guess we are only writing on simple Ethernet header. */
3006           vnet_rewrite_one_header (adj0[0], ip0, 
3007                                    sizeof (ethernet_header_t));
3008           
3009           /* Update packet buffer attributes/set output interface. */
3010           rw_len0 = adj0[0].rewrite_header.data_bytes;
3011           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
3012           
3013           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
3014               vlib_increment_combined_counter 
3015                   (&lm->adjacency_counters,
3016                    cpu_index, adj_index0, 
3017                    /* packet increment */ 0,
3018                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
3019           
3020           /* Check MTU of outgoing interface. */
3021           error0 = (vlib_buffer_length_in_chain (vm, p0) 
3022                     > adj0[0].rewrite_header.max_l3_packet_bytes
3023                     ? IP4_ERROR_MTU_EXCEEDED
3024                     : error0);
3025
3026           p0->error = error_node->errors[error0];
3027
3028           /* Don't adjust the buffer for ttl issue; icmp-error node wants
3029            * to see the IP headerr */
3030           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
3031             {
3032               p0->current_data -= rw_len0;
3033               p0->current_length += rw_len0;
3034               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
3035
3036               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
3037               next0 = adj0[0].rewrite_header.next_index;
3038
3039               if (PREDICT_FALSE 
3040                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
3041                                     tx_sw_if_index0)))
3042                   {
3043                     p0->current_config_index = 
3044                       vec_elt (cm->config_index_by_sw_if_index, 
3045                                tx_sw_if_index0);
3046                     vnet_get_config_data (&cm->config_main,
3047                                           &p0->current_config_index,
3048                                           &next0,
3049                                           /* # bytes of config data */ 0);
3050                   }
3051             }
3052
3053           if (rewrite_for_locally_received_packets)
3054               next0 = next0 && next0_override ? next0_override : next0;
3055
3056           from += 1;
3057           n_left_from -= 1;
3058           to_next += 1;
3059           n_left_to_next -= 1;
3060       
3061           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3062                                            to_next, n_left_to_next,
3063                                            pi0, next0);
3064         }
3065   
3066       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3067     }
3068
3069   /* Need to do trace after rewrites to pick up new packet data. */
3070   if (node->flags & VLIB_NODE_FLAG_TRACE)
3071     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3072
3073   return frame->n_vectors;
3074 }
3075
3076
3077 /** @brief IPv4 transit rewrite node.
3078     @node ip4-rewrite-transit
3079
3080     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
3081     header checksum, fetch the ip adjacency, check the outbound mtu,
3082     apply the adjacency rewrite, and send pkts to the adjacency
3083     rewrite header's rewrite_next_index.
3084
3085     @param vm vlib_main_t corresponding to the current thread
3086     @param node vlib_node_runtime_t
3087     @param frame vlib_frame_t whose contents should be dispatched
3088
3089     @par Graph mechanics: buffer metadata, next index usage
3090
3091     @em Uses:
3092     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3093         - the rewrite adjacency index
3094     - <code>adj->lookup_next_index</code>
3095         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3096           the packet will be dropped. 
3097     - <code>adj->rewrite_header</code>
3098         - Rewrite string length, rewrite string, next_index
3099
3100     @em Sets:
3101     - <code>b->current_data, b->current_length</code>
3102         - Updated net of applying the rewrite string
3103
3104     <em>Next Indices:</em>
3105     - <code> adj->rewrite_header.next_index </code>
3106       or @c error-drop 
3107 */
3108 static uword
3109 ip4_rewrite_transit (vlib_main_t * vm,
3110                      vlib_node_runtime_t * node,
3111                      vlib_frame_t * frame)
3112 {
3113   return ip4_rewrite_inline (vm, node, frame,
3114                              /* rewrite_for_locally_received_packets */ 0);
3115 }
3116
3117 /** @brief IPv4 local rewrite node.
3118     @node ip4-rewrite-local
3119
3120     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3121     the outbound interface mtu, apply the adjacency rewrite, and send
3122     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3123     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3124     dst = interface addr."
3125
3126     @param vm vlib_main_t corresponding to the current thread
3127     @param node vlib_node_runtime_t
3128     @param frame vlib_frame_t whose contents should be dispatched
3129
3130     @par Graph mechanics: buffer metadata, next index usage
3131
3132     @em Uses:
3133     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3134         - the rewrite adjacency index
3135     - <code>adj->lookup_next_index</code>
3136         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3137           the packet will be dropped. 
3138     - <code>adj->rewrite_header</code>
3139         - Rewrite string length, rewrite string, next_index
3140
3141     @em Sets:
3142     - <code>b->current_data, b->current_length</code>
3143         - Updated net of applying the rewrite string
3144
3145     <em>Next Indices:</em>
3146     - <code> adj->rewrite_header.next_index </code>
3147       or @c error-drop 
3148 */
3149
3150 static uword
3151 ip4_rewrite_local (vlib_main_t * vm,
3152                    vlib_node_runtime_t * node,
3153                    vlib_frame_t * frame)
3154 {
3155   return ip4_rewrite_inline (vm, node, frame,
3156                              /* rewrite_for_locally_received_packets */ 1);
3157 }
3158
3159 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3160   .function = ip4_rewrite_transit,
3161   .name = "ip4-rewrite-transit",
3162   .vector_size = sizeof (u32),
3163
3164   .format_trace = format_ip4_rewrite_trace,
3165
3166   .n_next_nodes = 3,
3167   .next_nodes = {
3168     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3169     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3170     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3171   },
3172 };
3173
3174 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
3175
3176 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3177   .function = ip4_rewrite_local,
3178   .name = "ip4-rewrite-local",
3179   .vector_size = sizeof (u32),
3180
3181   .sibling_of = "ip4-rewrite-transit",
3182
3183   .format_trace = format_ip4_rewrite_trace,
3184
3185   .n_next_nodes = 0,
3186 };
3187
3188 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
3189
3190 static clib_error_t *
3191 add_del_interface_table (vlib_main_t * vm,
3192                          unformat_input_t * input,
3193                          vlib_cli_command_t * cmd)
3194 {
3195   vnet_main_t * vnm = vnet_get_main();
3196   clib_error_t * error = 0;
3197   u32 sw_if_index, table_id;
3198
3199   sw_if_index = ~0;
3200
3201   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3202     {
3203       error = clib_error_return (0, "unknown interface `%U'",
3204                                  format_unformat_error, input);
3205       goto done;
3206     }
3207
3208   if (unformat (input, "%d", &table_id))
3209     ;
3210   else
3211     {
3212       error = clib_error_return (0, "expected table id `%U'",
3213                                  format_unformat_error, input);
3214       goto done;
3215     }
3216
3217   {
3218     ip4_main_t * im = &ip4_main;
3219     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3220
3221     if (fib) 
3222       {
3223         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3224         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3225     }
3226   }
3227
3228  done:
3229   return error;
3230 }
3231
3232 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3233   .path = "set interface ip table",
3234   .function = add_del_interface_table,
3235   .short_help = "Add/delete FIB table id for interface",
3236 };
3237
3238
3239 static uword
3240 ip4_lookup_multicast (vlib_main_t * vm,
3241                       vlib_node_runtime_t * node,
3242                       vlib_frame_t * frame)
3243 {
3244   ip4_main_t * im = &ip4_main;
3245   ip_lookup_main_t * lm = &im->lookup_main;
3246   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3247   u32 n_left_from, n_left_to_next, * from, * to_next;
3248   ip_lookup_next_t next;
3249   u32 cpu_index = os_get_cpu_number();
3250
3251   from = vlib_frame_vector_args (frame);
3252   n_left_from = frame->n_vectors;
3253   next = node->cached_next_index;
3254
3255   while (n_left_from > 0)
3256     {
3257       vlib_get_next_frame (vm, node, next,
3258                            to_next, n_left_to_next);
3259
3260       while (n_left_from >= 4 && n_left_to_next >= 2)
3261         {
3262           vlib_buffer_t * p0, * p1;
3263           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3264           ip_lookup_next_t next0, next1;
3265           ip4_header_t * ip0, * ip1;
3266           ip_adjacency_t * adj0, * adj1;
3267           u32 fib_index0, fib_index1;
3268           u32 flow_hash_config0, flow_hash_config1;
3269
3270           /* Prefetch next iteration. */
3271           {
3272             vlib_buffer_t * p2, * p3;
3273
3274             p2 = vlib_get_buffer (vm, from[2]);
3275             p3 = vlib_get_buffer (vm, from[3]);
3276
3277             vlib_prefetch_buffer_header (p2, LOAD);
3278             vlib_prefetch_buffer_header (p3, LOAD);
3279
3280             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3281             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3282           }
3283
3284           pi0 = to_next[0] = from[0];
3285           pi1 = to_next[1] = from[1];
3286
3287           p0 = vlib_get_buffer (vm, pi0);
3288           p1 = vlib_get_buffer (vm, pi1);
3289
3290           ip0 = vlib_buffer_get_current (p0);
3291           ip1 = vlib_buffer_get_current (p1);
3292
3293           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3294           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3295           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3296             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3297           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3298             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3299
3300           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3301                                               &ip0->dst_address, p0);
3302           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3303                                               &ip1->dst_address, p1);
3304
3305           adj0 = ip_get_adjacency (lm, adj_index0);
3306           adj1 = ip_get_adjacency (lm, adj_index1);
3307
3308           next0 = adj0->lookup_next_index;
3309           next1 = adj1->lookup_next_index;
3310
3311           flow_hash_config0 = 
3312               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3313
3314           flow_hash_config1 = 
3315               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3316
3317           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3318               (ip0, flow_hash_config0);
3319                                                                   
3320           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3321               (ip1, flow_hash_config1);
3322
3323           ASSERT (adj0->n_adj > 0);
3324           ASSERT (adj1->n_adj > 0);
3325           ASSERT (is_pow2 (adj0->n_adj));
3326           ASSERT (is_pow2 (adj1->n_adj));
3327           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3328           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3329
3330           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3331           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3332
3333           if (1) /* $$$$$$ HACK FIXME */
3334           vlib_increment_combined_counter 
3335               (cm, cpu_index, adj_index0, 1,
3336                vlib_buffer_length_in_chain (vm, p0));
3337           if (1) /* $$$$$$ HACK FIXME */
3338           vlib_increment_combined_counter 
3339               (cm, cpu_index, adj_index1, 1,
3340                vlib_buffer_length_in_chain (vm, p1));
3341
3342           from += 2;
3343           to_next += 2;
3344           n_left_to_next -= 2;
3345           n_left_from -= 2;
3346
3347           wrong_next = (next0 != next) + 2*(next1 != next);
3348           if (PREDICT_FALSE (wrong_next != 0))
3349             {
3350               switch (wrong_next)
3351                 {
3352                 case 1:
3353                   /* A B A */
3354                   to_next[-2] = pi1;
3355                   to_next -= 1;
3356                   n_left_to_next += 1;
3357                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3358                   break;
3359
3360                 case 2:
3361                   /* A A B */
3362                   to_next -= 1;
3363                   n_left_to_next += 1;
3364                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3365                   break;
3366
3367                 case 3:
3368                   /* A B C */
3369                   to_next -= 2;
3370                   n_left_to_next += 2;
3371                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3372                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3373                   if (next0 == next1)
3374                     {
3375                       /* A B B */
3376                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3377                       next = next1;
3378                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3379                     }
3380                 }
3381             }
3382         }
3383     
3384       while (n_left_from > 0 && n_left_to_next > 0)
3385         {
3386           vlib_buffer_t * p0;
3387           ip4_header_t * ip0;
3388           u32 pi0, adj_index0;
3389           ip_lookup_next_t next0;
3390           ip_adjacency_t * adj0;
3391           u32 fib_index0;
3392           u32 flow_hash_config0;
3393
3394           pi0 = from[0];
3395           to_next[0] = pi0;
3396
3397           p0 = vlib_get_buffer (vm, pi0);
3398
3399           ip0 = vlib_buffer_get_current (p0);
3400
3401           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3402                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3403           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3404               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3405           
3406           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3407                                               &ip0->dst_address, p0);
3408
3409           adj0 = ip_get_adjacency (lm, adj_index0);
3410
3411           next0 = adj0->lookup_next_index;
3412
3413           flow_hash_config0 = 
3414               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3415
3416           vnet_buffer (p0)->ip.flow_hash = 
3417             ip4_compute_flow_hash (ip0, flow_hash_config0);
3418
3419           ASSERT (adj0->n_adj > 0);
3420           ASSERT (is_pow2 (adj0->n_adj));
3421           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3422
3423           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3424
3425           if (1) /* $$$$$$ HACK FIXME */
3426               vlib_increment_combined_counter 
3427                   (cm, cpu_index, adj_index0, 1,
3428                    vlib_buffer_length_in_chain (vm, p0));
3429
3430           from += 1;
3431           to_next += 1;
3432           n_left_to_next -= 1;
3433           n_left_from -= 1;
3434
3435           if (PREDICT_FALSE (next0 != next))
3436             {
3437               n_left_to_next += 1;
3438               vlib_put_next_frame (vm, node, next, n_left_to_next);
3439               next = next0;
3440               vlib_get_next_frame (vm, node, next,
3441                                    to_next, n_left_to_next);
3442               to_next[0] = pi0;
3443               to_next += 1;
3444               n_left_to_next -= 1;
3445             }
3446         }
3447
3448       vlib_put_next_frame (vm, node, next, n_left_to_next);
3449     }
3450
3451   if (node->flags & VLIB_NODE_FLAG_TRACE)
3452       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3453
3454   return frame->n_vectors;
3455 }
3456
3457 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3458   .function = ip4_lookup_multicast,
3459   .name = "ip4-lookup-multicast",
3460   .vector_size = sizeof (u32),
3461   .sibling_of = "ip4-lookup",
3462   .format_trace = format_ip4_lookup_trace,
3463
3464   .n_next_nodes = 0,
3465 };
3466
3467 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
3468
3469 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3470   .function = ip4_drop,
3471   .name = "ip4-multicast",
3472   .vector_size = sizeof (u32),
3473
3474   .format_trace = format_ip4_forward_next_trace,
3475
3476   .n_next_nodes = 1,
3477   .next_nodes = {
3478     [0] = "error-drop",
3479   },
3480 };
3481
3482 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3483 {
3484   ip4_main_t * im = &ip4_main;
3485   ip4_fib_mtrie_t * mtrie0;
3486   ip4_fib_mtrie_leaf_t leaf0;
3487   u32 adj_index0;
3488     
3489   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3490
3491   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3492   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3493   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3494   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3495   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3496   
3497   /* Handle default route. */
3498   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3499   
3500   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3501   
3502   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3503                                                   a, 
3504                                                   /* no_default_route */ 0);
3505 }
3506  
3507 static clib_error_t *
3508 test_lookup_command_fn (vlib_main_t * vm,
3509                         unformat_input_t * input,
3510                         vlib_cli_command_t * cmd)
3511 {
3512   u32 table_id = 0;
3513   f64 count = 1;
3514   u32 n;
3515   int i;
3516   ip4_address_t ip4_base_address;
3517   u64 errors = 0;
3518
3519   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3520       if (unformat (input, "table %d", &table_id))
3521         ;
3522       else if (unformat (input, "count %f", &count))
3523         ;
3524
3525       else if (unformat (input, "%U",
3526                          unformat_ip4_address, &ip4_base_address))
3527         ;
3528       else
3529         return clib_error_return (0, "unknown input `%U'",
3530                                   format_unformat_error, input);
3531   }
3532
3533   n = count;
3534
3535   for (i = 0; i < n; i++)
3536     {
3537       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3538         errors++;
3539
3540       ip4_base_address.as_u32 = 
3541         clib_host_to_net_u32 (1 + 
3542                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3543     }
3544
3545   if (errors) 
3546     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3547   else
3548     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3549
3550   return 0;
3551 }
3552
3553 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3554     .path = "test lookup",
3555     .short_help = "test lookup",
3556     .function = test_lookup_command_fn,
3557 };
3558
3559 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3560 {
3561   ip4_main_t * im4 = &ip4_main;
3562   ip4_fib_t * fib;
3563   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3564
3565   if (p == 0)
3566     return VNET_API_ERROR_NO_SUCH_FIB;
3567
3568   fib = vec_elt_at_index (im4->fibs, p[0]);
3569
3570   fib->flow_hash_config = flow_hash_config;
3571   return 0;
3572 }
3573  
3574 static clib_error_t *
3575 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3576                              unformat_input_t * input,
3577                              vlib_cli_command_t * cmd)
3578 {
3579   int matched = 0;
3580   u32 table_id = 0;
3581   u32 flow_hash_config = 0;
3582   int rv;
3583
3584   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3585     if (unformat (input, "table %d", &table_id))
3586       matched = 1;
3587 #define _(a,v) \
3588     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3589     foreach_flow_hash_bit
3590 #undef _
3591     else break;
3592   }
3593   
3594   if (matched == 0)
3595     return clib_error_return (0, "unknown input `%U'",
3596                               format_unformat_error, input);
3597   
3598   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3599   switch (rv)
3600     {
3601     case 0:
3602       break;
3603       
3604     case VNET_API_ERROR_NO_SUCH_FIB:
3605       return clib_error_return (0, "no such FIB table %d", table_id);
3606       
3607     default:
3608       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3609       break;
3610     }
3611   
3612   return 0;
3613 }
3614  
3615 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3616   .path = "set ip flow-hash",
3617   .short_help = 
3618   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3619   .function = set_ip_flow_hash_command_fn,
3620 };
3621  
3622 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3623                                  u32 table_index)
3624 {
3625   vnet_main_t * vnm = vnet_get_main();
3626   vnet_interface_main_t * im = &vnm->interface_main;
3627   ip4_main_t * ipm = &ip4_main;
3628   ip_lookup_main_t * lm = &ipm->lookup_main;
3629   vnet_classify_main_t * cm = &vnet_classify_main;
3630
3631   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3632     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3633
3634   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3635     return VNET_API_ERROR_NO_SUCH_ENTRY;
3636
3637   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3638   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3639
3640   return 0;
3641 }
3642
3643 static clib_error_t *
3644 set_ip_classify_command_fn (vlib_main_t * vm,
3645                             unformat_input_t * input,
3646                             vlib_cli_command_t * cmd)
3647 {
3648   u32 table_index = ~0;
3649   int table_index_set = 0;
3650   u32 sw_if_index = ~0;
3651   int rv;
3652   
3653   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3654     if (unformat (input, "table-index %d", &table_index))
3655       table_index_set = 1;
3656     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3657                        vnet_get_main(), &sw_if_index))
3658       ;
3659     else
3660       break;
3661   }
3662       
3663   if (table_index_set == 0)
3664     return clib_error_return (0, "classify table-index must be specified");
3665
3666   if (sw_if_index == ~0)
3667     return clib_error_return (0, "interface / subif must be specified");
3668
3669   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3670
3671   switch (rv)
3672     {
3673     case 0:
3674       break;
3675
3676     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3677       return clib_error_return (0, "No such interface");
3678
3679     case VNET_API_ERROR_NO_SUCH_ENTRY:
3680       return clib_error_return (0, "No such classifier table");
3681     }
3682   return 0;
3683 }
3684
3685 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3686     .path = "set ip classify",
3687     .short_help = 
3688     "set ip classify intfc <int> table-index <index>",
3689     .function = set_ip_classify_command_fn,
3690 };
3691
3692
3693 #define TEST_CODE 1
3694 #if TEST_CODE > 0
3695
3696 static clib_error_t *
3697 set_interface_output_feature_command_fn (vlib_main_t * vm,
3698                                          unformat_input_t * input,
3699                                          vlib_cli_command_t * cmd)
3700 {
3701   vnet_main_t * vnm = vnet_get_main();
3702   u32 sw_if_index = ~0;
3703   int is_add = 1;
3704   ip4_main_t * im = &ip4_main;
3705   ip_lookup_main_t * lm = &im->lookup_main;
3706
3707   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
3708     {
3709       if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3710         ;
3711       else if (unformat (input, "del"))
3712         is_add = 0;
3713       else
3714         break;
3715     }
3716
3717   if (sw_if_index == ~0)
3718     return clib_error_return (0, "unknown interface `%U'",
3719                               format_unformat_error, input);
3720
3721   lm->tx_sw_if_has_ip_output_features =
3722     clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3723
3724   return 0;
3725 }
3726
3727 VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
3728   .path = "set interface output feature",
3729   .function = set_interface_output_feature_command_fn,
3730   .short_help = "set interface output feature <intfc>",
3731 };
3732 #endif /* TEST_CODE */