Create macro for next_nodes used in lookup family of nodes
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
246     {
247       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248       if (adj->share_count > 0)
249         adj->share_count --;
250     }
251
252   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253                                is_del ? old_adj_index : adj_index,
254                                is_del);
255
256   /* Delete old adjacency index if present and changed. */
257   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258       && old_adj_index != ~0
259       && old_adj_index != adj_index)
260     ip_del_adjacency (lm, old_adj_index);
261 }
262
263 void
264 ip4_add_del_route_next_hop (ip4_main_t * im,
265                             u32 flags,
266                             ip4_address_t * dst_address,
267                             u32 dst_address_length,
268                             ip4_address_t * next_hop,
269                             u32 next_hop_sw_if_index,
270                             u32 next_hop_weight, u32 adj_index, 
271                             u32 explicit_fib_index)
272 {
273   vnet_main_t * vnm = vnet_get_main();
274   ip_lookup_main_t * lm = &im->lookup_main;
275   u32 fib_index;
276   ip4_fib_t * fib;
277   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278   u32 dst_adj_index, nh_adj_index;
279   uword * dst_hash, * dst_result;
280   uword * nh_hash, * nh_result;
281   ip_adjacency_t * dst_adj;
282   ip_multipath_adjacency_t * old_mp, * new_mp;
283   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284   int is_interface_next_hop;
285   clib_error_t * error = 0;
286
287   if (explicit_fib_index == (u32)~0)
288       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
289   else
290       fib_index = explicit_fib_index;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293   
294   /* Lookup next hop to be added or deleted. */
295   is_interface_next_hop = next_hop->data_u32 == 0;
296   if (adj_index == (u32)~0)
297     {
298       if (is_interface_next_hop)
299         {
300           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
301           if (nh_result)
302             nh_adj_index = *nh_result;
303           else
304             {
305               ip_adjacency_t * adj;
306               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
307                                       &nh_adj_index);
308               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
311             }
312         }
313       else
314         {
315           nh_hash = fib->adj_index_by_dst_address[32];
316           nh_result = hash_get (nh_hash, next_hop->data_u32);
317           
318           /* Next hop must be known. */
319           if (! nh_result)
320             {
321               ip_adjacency_t * adj;
322
323               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
324                                                         next_hop, 0);
325               adj = ip_get_adjacency (lm, nh_adj_index);
326               /* if ARP interface adjacencty is present, we need to
327                  install ARP adjaceny for specific next hop */
328               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
329                   adj->arp.next_hop.ip4.as_u32 == 0)
330                 {
331                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
332                 }
333               else
334                 {
335                   vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
336                   error = clib_error_return (0, "next-hop %U/32 not in FIB",
337                                              format_ip4_address, next_hop);
338                   goto done;
339                 }
340             }
341           else
342             nh_adj_index = *nh_result;
343         }
344     }
345   else
346     {
347       nh_adj_index = adj_index;
348     }
349   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
350   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
351
352   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
353   dst_result = hash_get (dst_hash, dst_address_u32);
354   if (dst_result)
355     {
356       dst_adj_index = dst_result[0];
357       dst_adj = ip_get_adjacency (lm, dst_adj_index);
358     }
359   else
360     {
361       /* For deletes destination must be known. */
362       if (is_del)
363         {
364           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
365           error = clib_error_return (0, "unknown destination %U/%d",
366                                      format_ip4_address, dst_address,
367                                      dst_address_length);
368           goto done;
369         }
370
371       dst_adj_index = ~0;
372       dst_adj = 0;
373     }
374
375   /* Ignore adds of X/32 with next hop of X. */
376   if (! is_del
377       && dst_address_length == 32
378       && dst_address->data_u32 == next_hop->data_u32 
379       && adj_index != (u32)~0)
380     {
381       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
382       error = clib_error_return (0, "prefix matches next hop %U/%d",
383                                  format_ip4_address, dst_address,
384                                  dst_address_length);
385       goto done;
386     }
387
388   /* Destination is not known and default weight is set so add route
389      to existing non-multipath adjacency */
390   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
391     {
392       /* create new adjacency */
393       ip4_add_del_route_args_t a;
394       a.table_index_or_table_id = fib_index;
395       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
396                  | IP4_ROUTE_FLAG_FIB_INDEX
397                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
398                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
399                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
400       a.dst_address = dst_address[0];
401       a.dst_address_length = dst_address_length;
402       a.adj_index = nh_adj_index;
403       a.add_adj = 0;
404       a.n_add_adj = 0;
405
406       ip4_add_del_route (im, &a);
407
408       goto done;
409     }
410
411   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
412
413   if (! ip_multipath_adjacency_add_del_next_hop
414       (lm, is_del,
415        old_mp_adj_index,
416        nh_adj_index,
417        next_hop_weight,
418        &new_mp_adj_index))
419     {
420       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
421       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
422                                  format_ip4_address, next_hop);
423       goto done;
424     }
425   
426   old_mp = new_mp = 0;
427   if (old_mp_adj_index != ~0)
428     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
429   if (new_mp_adj_index != ~0)
430     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
431
432   if (old_mp != new_mp)
433     {
434       ip4_add_del_route_args_t a;
435       a.table_index_or_table_id = fib_index;
436       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
437                  | IP4_ROUTE_FLAG_FIB_INDEX
438                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
439                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
440       a.dst_address = dst_address[0];
441       a.dst_address_length = dst_address_length;
442       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
443       a.add_adj = 0;
444       a.n_add_adj = 0;
445
446       ip4_add_del_route (im, &a);
447     }
448
449  done:
450   if (error)
451     clib_error_report (error);
452 }
453
454 void *
455 ip4_get_route (ip4_main_t * im,
456                u32 table_index_or_table_id,
457                u32 flags,
458                u8 * address,
459                u32 address_length)
460 {
461   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
462   u32 dst_address = * (u32 *) address;
463   uword * hash, * p;
464
465   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
466   dst_address &= im->fib_masks[address_length];
467
468   hash = fib->adj_index_by_dst_address[address_length];
469   p = hash_get (hash, dst_address);
470   return (void *) p;
471 }
472
473 void
474 ip4_foreach_matching_route (ip4_main_t * im,
475                             u32 table_index_or_table_id,
476                             u32 flags,
477                             ip4_address_t * address,
478                             u32 address_length,
479                             ip4_address_t ** results,
480                             u8 ** result_lengths)
481 {
482   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
483   u32 dst_address = address->data_u32;
484   u32 this_length = address_length;
485   
486   if (*results)
487     _vec_len (*results) = 0;
488   if (*result_lengths)
489     _vec_len (*result_lengths) = 0;
490
491   while (this_length <= 32 && vec_len (results) == 0)
492     {
493       uword k, v;
494       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
495         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
496           {
497             ip4_address_t a;
498             a.data_u32 = k;
499             vec_add1 (*results, a);
500             vec_add1 (*result_lengths, this_length);
501           }
502       }));
503
504       this_length++;
505     }
506 }
507
508 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
509                                   u32 table_index_or_table_id,
510                                   u32 flags)
511 {
512   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
513   ip_lookup_main_t * lm = &im->lookup_main;
514   u32 i, l;
515   ip4_address_t a;
516   ip4_add_del_route_callback_t * cb;
517   static ip4_address_t * to_delete;
518
519   if (lm->n_adjacency_remaps == 0)
520     return;
521
522   for (l = 0; l <= 32; l++)
523     {
524       hash_pair_t * p;
525       uword * hash = fib->adj_index_by_dst_address[l];
526
527       if (hash_elts (hash) == 0)
528         continue;
529
530       if (to_delete)
531         _vec_len (to_delete) = 0;
532
533       hash_foreach_pair (p, hash, ({
534         u32 adj_index = p->value[0];
535         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
536
537         if (m)
538           {
539             /* Record destination address from hash key. */
540             a.data_u32 = p->key;
541
542             /* New adjacency points to nothing: so delete prefix. */
543             if (m == ~0)
544               vec_add1 (to_delete, a);
545             else
546               {
547                 /* Remap to new adjacency. */
548                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
549
550                 /* Set new adjacency value. */
551                 fib->new_hash_values[0] = p->value[0] = m - 1;
552
553                 vec_foreach (cb, im->add_del_route_callbacks)
554                   if ((flags & cb->required_flags) == cb->required_flags)
555                     cb->function (im, cb->function_opaque,
556                                   fib, flags | IP4_ROUTE_FLAG_ADD,
557                                   &a, l,
558                                   fib->old_hash_values,
559                                   fib->new_hash_values);
560               }
561           }
562       }));
563
564       fib->new_hash_values[0] = ~0;
565       for (i = 0; i < vec_len (to_delete); i++)
566         {
567           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
568           vec_foreach (cb, im->add_del_route_callbacks)
569             if ((flags & cb->required_flags) == cb->required_flags)
570               cb->function (im, cb->function_opaque,
571                             fib, flags | IP4_ROUTE_FLAG_DEL,
572                             &a, l,
573                             fib->old_hash_values,
574                             fib->new_hash_values);
575         }
576     }
577
578   /* Also remap adjacencies in mtrie. */
579   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
580
581   /* Reset mapping table. */
582   vec_zero (lm->adjacency_remap_table);
583
584   /* All remaps have been performed. */
585   lm->n_adjacency_remaps = 0;
586 }
587
588 void ip4_delete_matching_routes (ip4_main_t * im,
589                                  u32 table_index_or_table_id,
590                                  u32 flags,
591                                  ip4_address_t * address,
592                                  u32 address_length)
593 {
594   static ip4_address_t * matching_addresses;
595   static u8 * matching_address_lengths;
596   u32 l, i;
597   ip4_add_del_route_args_t a;
598
599   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
600   a.table_index_or_table_id = table_index_or_table_id;
601   a.adj_index = ~0;
602   a.add_adj = 0;
603   a.n_add_adj = 0;
604
605   for (l = address_length + 1; l <= 32; l++)
606     {
607       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
608                                   address,
609                                   l,
610                                   &matching_addresses,
611                                   &matching_address_lengths);
612       for (i = 0; i < vec_len (matching_addresses); i++)
613         {
614           a.dst_address = matching_addresses[i];
615           a.dst_address_length = matching_address_lengths[i];
616           ip4_add_del_route (im, &a);
617         }
618     }
619
620   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
621 }
622
623 always_inline uword
624 ip4_lookup_inline (vlib_main_t * vm,
625                    vlib_node_runtime_t * node,
626                    vlib_frame_t * frame,
627                    int lookup_for_responses_to_locally_received_packets)
628 {
629   ip4_main_t * im = &ip4_main;
630   ip_lookup_main_t * lm = &im->lookup_main;
631   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
632   u32 n_left_from, n_left_to_next, * from, * to_next;
633   ip_lookup_next_t next;
634   u32 cpu_index = os_get_cpu_number();
635
636   from = vlib_frame_vector_args (frame);
637   n_left_from = frame->n_vectors;
638   next = node->cached_next_index;
639
640   while (n_left_from > 0)
641     {
642       vlib_get_next_frame (vm, node, next,
643                            to_next, n_left_to_next);
644
645       while (n_left_from >= 4 && n_left_to_next >= 2)
646         {
647           vlib_buffer_t * p0, * p1;
648           ip4_header_t * ip0, * ip1;
649           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
650           ip_lookup_next_t next0, next1;
651           ip_adjacency_t * adj0, * adj1;
652           ip4_fib_mtrie_t * mtrie0, * mtrie1;
653           ip4_fib_mtrie_leaf_t leaf0, leaf1;
654           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
655           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
656           u32 flow_hash_config0, flow_hash_config1;
657           u32 hash_c0, hash_c1;
658           u32 wrong_next;
659
660           /* Prefetch next iteration. */
661           {
662             vlib_buffer_t * p2, * p3;
663
664             p2 = vlib_get_buffer (vm, from[2]);
665             p3 = vlib_get_buffer (vm, from[3]);
666
667             vlib_prefetch_buffer_header (p2, LOAD);
668             vlib_prefetch_buffer_header (p3, LOAD);
669
670             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
671             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
672           }
673
674           pi0 = to_next[0] = from[0];
675           pi1 = to_next[1] = from[1];
676
677           p0 = vlib_get_buffer (vm, pi0);
678           p1 = vlib_get_buffer (vm, pi1);
679
680           ip0 = vlib_buffer_get_current (p0);
681           ip1 = vlib_buffer_get_current (p1);
682
683           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
684           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
685           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
686             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
687           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
688             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
689
690
691           if (! lookup_for_responses_to_locally_received_packets)
692             {
693               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
694               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
695
696               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
697
698               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
699               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
700             }
701
702           tcp0 = (void *) (ip0 + 1);
703           tcp1 = (void *) (ip1 + 1);
704
705           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
706                          || ip0->protocol == IP_PROTOCOL_UDP);
707           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
708                          || ip1->protocol == IP_PROTOCOL_UDP);
709
710           if (! lookup_for_responses_to_locally_received_packets)
711             {
712               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
713               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
714             }
715
716           if (! lookup_for_responses_to_locally_received_packets)
717             {
718               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
719               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
720             }
721
722           if (! lookup_for_responses_to_locally_received_packets)
723             {
724               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
725               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
726             }
727
728           if (lookup_for_responses_to_locally_received_packets)
729             {
730               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
731               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
732             }
733           else
734             {
735               /* Handle default route. */
736               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
737               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
738
739               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
740               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
741             }
742
743           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
744                                                            &ip0->dst_address,
745                                                            /* no_default_route */ 0));
746           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
747                                                            &ip1->dst_address,
748                                                            /* no_default_route */ 0));
749           adj0 = ip_get_adjacency (lm, adj_index0);
750           adj1 = ip_get_adjacency (lm, adj_index1);
751
752           next0 = adj0->lookup_next_index;
753           next1 = adj1->lookup_next_index;
754
755           /* Use flow hash to compute multipath adjacency. */
756           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
757           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
758           if (PREDICT_FALSE (adj0->n_adj > 1))
759             {
760               flow_hash_config0 = 
761                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
762               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
763                 ip4_compute_flow_hash (ip0, flow_hash_config0);
764             }
765           if (PREDICT_FALSE(adj1->n_adj > 1))
766             {
767               flow_hash_config1 = 
768                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
769               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
770                 ip4_compute_flow_hash (ip1, flow_hash_config1);
771             }
772
773           ASSERT (adj0->n_adj > 0);
774           ASSERT (adj1->n_adj > 0);
775           ASSERT (is_pow2 (adj0->n_adj));
776           ASSERT (is_pow2 (adj1->n_adj));
777           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
778           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
779
780           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
781           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
782
783           vlib_increment_combined_counter 
784               (cm, cpu_index, adj_index0, 1,
785                vlib_buffer_length_in_chain (vm, p0) 
786                + sizeof(ethernet_header_t));
787           vlib_increment_combined_counter 
788               (cm, cpu_index, adj_index1, 1,
789                vlib_buffer_length_in_chain (vm, p1)
790                + sizeof(ethernet_header_t));
791
792           from += 2;
793           to_next += 2;
794           n_left_to_next -= 2;
795           n_left_from -= 2;
796
797           wrong_next = (next0 != next) + 2*(next1 != next);
798           if (PREDICT_FALSE (wrong_next != 0))
799             {
800               switch (wrong_next)
801                 {
802                 case 1:
803                   /* A B A */
804                   to_next[-2] = pi1;
805                   to_next -= 1;
806                   n_left_to_next += 1;
807                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
808                   break;
809
810                 case 2:
811                   /* A A B */
812                   to_next -= 1;
813                   n_left_to_next += 1;
814                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
815                   break;
816
817                 case 3:
818                   /* A B C */
819                   to_next -= 2;
820                   n_left_to_next += 2;
821                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
822                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
823                   if (next0 == next1)
824                     {
825                       /* A B B */
826                       vlib_put_next_frame (vm, node, next, n_left_to_next);
827                       next = next1;
828                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
829                     }
830                 }
831             }
832         }
833     
834       while (n_left_from > 0 && n_left_to_next > 0)
835         {
836           vlib_buffer_t * p0;
837           ip4_header_t * ip0;
838           __attribute__((unused)) tcp_header_t * tcp0;
839           ip_lookup_next_t next0;
840           ip_adjacency_t * adj0;
841           ip4_fib_mtrie_t * mtrie0;
842           ip4_fib_mtrie_leaf_t leaf0;
843           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
844           u32 flow_hash_config0, hash_c0;
845
846           pi0 = from[0];
847           to_next[0] = pi0;
848
849           p0 = vlib_get_buffer (vm, pi0);
850
851           ip0 = vlib_buffer_get_current (p0);
852
853           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
854           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
855             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
856
857           if (! lookup_for_responses_to_locally_received_packets)
858             {
859               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
860
861               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
862
863               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
864             }
865
866           tcp0 = (void *) (ip0 + 1);
867
868           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
869                          || ip0->protocol == IP_PROTOCOL_UDP);
870
871           if (! lookup_for_responses_to_locally_received_packets)
872             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
873
874           if (! lookup_for_responses_to_locally_received_packets)
875             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
876
877           if (! lookup_for_responses_to_locally_received_packets)
878             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
879
880           if (lookup_for_responses_to_locally_received_packets)
881             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
882           else
883             {
884               /* Handle default route. */
885               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
886               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
887             }
888
889           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
890                                                            &ip0->dst_address,
891                                                            /* no_default_route */ 0));
892
893           adj0 = ip_get_adjacency (lm, adj_index0);
894
895           next0 = adj0->lookup_next_index;
896
897           /* Use flow hash to compute multipath adjacency. */
898           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
899           if (PREDICT_FALSE(adj0->n_adj > 1))
900             {
901               flow_hash_config0 = 
902                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
903
904               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
905                 ip4_compute_flow_hash (ip0, flow_hash_config0);
906             }
907
908           ASSERT (adj0->n_adj > 0);
909           ASSERT (is_pow2 (adj0->n_adj));
910           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
911
912           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
913
914           vlib_increment_combined_counter 
915               (cm, cpu_index, adj_index0, 1,
916                vlib_buffer_length_in_chain (vm, p0)
917                + sizeof(ethernet_header_t));
918
919           from += 1;
920           to_next += 1;
921           n_left_to_next -= 1;
922           n_left_from -= 1;
923
924           if (PREDICT_FALSE (next0 != next))
925             {
926               n_left_to_next += 1;
927               vlib_put_next_frame (vm, node, next, n_left_to_next);
928               next = next0;
929               vlib_get_next_frame (vm, node, next,
930                                    to_next, n_left_to_next);
931               to_next[0] = pi0;
932               to_next += 1;
933               n_left_to_next -= 1;
934             }
935         }
936
937       vlib_put_next_frame (vm, node, next, n_left_to_next);
938     }
939
940   return frame->n_vectors;
941 }
942
943 static uword
944 ip4_lookup (vlib_main_t * vm,
945             vlib_node_runtime_t * node,
946             vlib_frame_t * frame)
947 {
948   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
949
950 }
951
952 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
953                                         ip_adjacency_t * adj,
954                                         u32 sw_if_index,
955                                         u32 if_address_index)
956 {
957   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
958   ip_lookup_next_t n;
959   vnet_l3_packet_type_t packet_type;
960   u32 node_index;
961
962   if (hw->hw_class_index == ethernet_hw_interface_class.index
963       || hw->hw_class_index == srp_hw_interface_class.index)
964     {
965       /* 
966        * We have a bit of a problem in this case. ip4-arp uses
967        * the rewrite_header.next_index to hand pkts to the
968        * indicated inteface output node. We can end up in
969        * ip4_rewrite_local, too, which also pays attention to 
970        * rewrite_header.next index. Net result: a hack in
971        * ip4_rewrite_local...
972        */
973       n = IP_LOOKUP_NEXT_ARP;
974       node_index = ip4_arp_node.index;
975       adj->if_address_index = if_address_index;
976       adj->arp.next_hop.ip4.as_u32 = 0;
977       packet_type = VNET_L3_PACKET_TYPE_ARP;
978     }
979   else
980     {
981       n = IP_LOOKUP_NEXT_REWRITE;
982       node_index = ip4_rewrite_node.index;
983       packet_type = VNET_L3_PACKET_TYPE_IP4;
984     }
985
986   adj->lookup_next_index = n;
987   vnet_rewrite_for_sw_interface
988     (vnm,
989      packet_type,
990      sw_if_index,
991      node_index,
992      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
993      &adj->rewrite_header,
994      sizeof (adj->rewrite_data));
995 }
996
997 static void
998 ip4_add_interface_routes (u32 sw_if_index,
999                           ip4_main_t * im, u32 fib_index,
1000                           ip_interface_address_t * a)
1001 {
1002   vnet_main_t * vnm = vnet_get_main();
1003   ip_lookup_main_t * lm = &im->lookup_main;
1004   ip_adjacency_t * adj;
1005   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1006   ip4_add_del_route_args_t x;
1007   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1008   u32 classify_table_index;
1009
1010   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1011   x.table_index_or_table_id = fib_index;
1012   x.flags = (IP4_ROUTE_FLAG_ADD
1013              | IP4_ROUTE_FLAG_FIB_INDEX
1014              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1015   x.dst_address = address[0];
1016   x.dst_address_length = a->address_length;
1017   x.n_add_adj = 0;
1018   x.add_adj = 0;
1019
1020   a->neighbor_probe_adj_index = ~0;
1021   if (a->address_length < 32)
1022     {
1023       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1024                               &x.adj_index);
1025       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1026       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1027       ip4_add_del_route (im, &x);
1028       a->neighbor_probe_adj_index = x.adj_index;
1029     }
1030   
1031   /* Add e.g. 1.1.1.1/32 as local to this host. */
1032   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1033                           &x.adj_index);
1034   
1035   classify_table_index = ~0;
1036   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1037     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1038   if (classify_table_index != (u32) ~0)
1039     {
1040       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1041       adj->classify.table_index = classify_table_index;
1042     }
1043   else
1044     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1045   
1046   adj->if_address_index = a - lm->if_address_pool;
1047   adj->rewrite_header.sw_if_index = sw_if_index;
1048   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1049   /* 
1050    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1051    * fail an RPF-ish check, but still go thru the rewrite code...
1052    */
1053   adj->rewrite_header.data_bytes = 0;
1054
1055   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1056   x.dst_address_length = 32;
1057   ip4_add_del_route (im, &x);
1058 }
1059
1060 static void
1061 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1062 {
1063   ip4_add_del_route_args_t x;
1064
1065   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1066   x.table_index_or_table_id = fib_index;
1067   x.flags = (IP4_ROUTE_FLAG_DEL
1068              | IP4_ROUTE_FLAG_FIB_INDEX
1069              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1070   x.dst_address = address[0];
1071   x.dst_address_length = address_length;
1072   x.adj_index = ~0;
1073   x.n_add_adj = 0;
1074   x.add_adj = 0;
1075
1076   if (address_length < 32)
1077     ip4_add_del_route (im, &x);
1078
1079   x.dst_address_length = 32;
1080   ip4_add_del_route (im, &x);
1081
1082   ip4_delete_matching_routes (im,
1083                               fib_index,
1084                               IP4_ROUTE_FLAG_FIB_INDEX,
1085                               address,
1086                               address_length);
1087 }
1088
1089 typedef struct {
1090     u32 sw_if_index;
1091     ip4_address_t address;
1092     u32 length;
1093 } ip4_interface_address_t;
1094
1095 static clib_error_t *
1096 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1097                                         u32 sw_if_index,
1098                                         ip4_address_t * new_address,
1099                                         u32 new_length,
1100                                         u32 redistribute,
1101                                         u32 insert_routes,
1102                                         u32 is_del);
1103
1104 static clib_error_t *
1105 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1106                                         u32 sw_if_index,
1107                                         ip4_address_t * address,
1108                                         u32 address_length,
1109                                         u32 redistribute,
1110                                         u32 insert_routes,
1111                                         u32 is_del)
1112 {
1113   vnet_main_t * vnm = vnet_get_main();
1114   ip4_main_t * im = &ip4_main;
1115   ip_lookup_main_t * lm = &im->lookup_main;
1116   clib_error_t * error = 0;
1117   u32 if_address_index, elts_before;
1118   ip4_address_fib_t ip4_af, * addr_fib = 0;
1119
1120   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1121   ip4_addr_fib_init (&ip4_af, address,
1122                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1123   vec_add1 (addr_fib, ip4_af);
1124
1125   /* When adding an address check that it does not conflict with an existing address. */
1126   if (! is_del)
1127     {
1128       ip_interface_address_t * ia;
1129       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1130                                     0 /* honor unnumbered */,
1131       ({
1132         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1133
1134         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1135             || ip4_destination_matches_route (im, x, address, address_length))
1136           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1137                                     format_ip4_address_and_length, address, address_length,
1138                                     format_ip4_address_and_length, x, ia->address_length,
1139                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1140       }));
1141     }
1142
1143   elts_before = pool_elts (lm->if_address_pool);
1144
1145   error = ip_interface_address_add_del
1146     (lm,
1147      sw_if_index,
1148      addr_fib,
1149      address_length,
1150      is_del,
1151      &if_address_index);
1152   if (error)
1153     goto done;
1154   
1155   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1156     {
1157       if (is_del)
1158         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1159                                   address_length);
1160       
1161       else
1162           ip4_add_interface_routes (sw_if_index,
1163                                     im, ip4_af.fib_index,
1164                                     pool_elt_at_index 
1165                                     (lm->if_address_pool, if_address_index));
1166     }
1167
1168   /* If pool did not grow/shrink: add duplicate address. */
1169   if (elts_before != pool_elts (lm->if_address_pool))
1170     {
1171       ip4_add_del_interface_address_callback_t * cb;
1172       vec_foreach (cb, im->add_del_interface_address_callbacks)
1173         cb->function (im, cb->function_opaque, sw_if_index,
1174                       address, address_length,
1175                       if_address_index,
1176                       is_del);
1177     }
1178
1179  done:
1180   vec_free (addr_fib);
1181   return error;
1182 }
1183
1184 clib_error_t *
1185 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1186                                ip4_address_t * address, u32 address_length,
1187                                u32 is_del)
1188 {
1189   return ip4_add_del_interface_address_internal
1190     (vm, sw_if_index, address, address_length,
1191      /* redistribute */ 1,
1192      /* insert_routes */ 1,
1193      is_del);
1194 }
1195
1196 static clib_error_t *
1197 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1198                                 u32 sw_if_index,
1199                                 u32 flags)
1200 {
1201   ip4_main_t * im = &ip4_main;
1202   ip_interface_address_t * ia;
1203   ip4_address_t * a;
1204   u32 is_admin_up, fib_index;
1205   
1206   /* Fill in lookup tables with default table (0). */
1207   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1208   
1209   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1210   
1211   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1212   
1213   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1214
1215   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1216                                 0 /* honor unnumbered */,
1217   ({
1218     a = ip_interface_address_get_address (&im->lookup_main, ia);
1219     if (is_admin_up)
1220       ip4_add_interface_routes (sw_if_index,
1221                                 im, fib_index,
1222                                 ia);
1223     else
1224       ip4_del_interface_routes (im, fib_index,
1225                                 a, ia->address_length);
1226   }));
1227
1228   return 0;
1229 }
1230  
1231 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1232
1233 static clib_error_t *
1234 ip4_sw_interface_add_del (vnet_main_t * vnm,
1235                           u32 sw_if_index,
1236                           u32 is_add)
1237 {
1238   vlib_main_t * vm = vnm->vlib_main;
1239   ip4_main_t * im = &ip4_main;
1240   ip_lookup_main_t * lm = &im->lookup_main;
1241   u32 ci, cast;
1242
1243   for (cast = 0; cast < VNET_N_CAST; cast++)
1244     {
1245       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1246       vnet_config_main_t * vcm = &cm->config_main;
1247
1248       if (! vcm->node_index_by_feature_index)
1249         {
1250           if (cast == VNET_UNICAST)
1251             {
1252               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1253               static char * feature_nodes[] = {
1254                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1255                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1256                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1257                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1258                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1259                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1260               };
1261
1262               vnet_config_init (vm, vcm,
1263                                 start_nodes, ARRAY_LEN (start_nodes),
1264                                 feature_nodes, ARRAY_LEN (feature_nodes));
1265             }
1266           else
1267             {
1268               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1269               static char * feature_nodes[] = {
1270                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1271                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1272               };
1273
1274               vnet_config_init (vm, vcm,
1275                                 start_nodes, ARRAY_LEN (start_nodes),
1276                                 feature_nodes, ARRAY_LEN (feature_nodes));
1277             }
1278         }
1279
1280       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1281       ci = cm->config_index_by_sw_if_index[sw_if_index];
1282
1283       if (is_add)
1284         ci = vnet_config_add_feature (vm, vcm,
1285                                       ci,
1286                                       IP4_RX_FEATURE_LOOKUP,
1287                                       /* config data */ 0,
1288                                       /* # bytes of config data */ 0);
1289       else
1290         ci = vnet_config_del_feature (vm, vcm,
1291                                       ci,
1292                                       IP4_RX_FEATURE_LOOKUP,
1293                                       /* config data */ 0,
1294                                       /* # bytes of config data */ 0);
1295
1296       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1297     }
1298
1299   return /* no error */ 0;
1300 }
1301
1302 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1303
1304
1305 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1306   .function = ip4_lookup,
1307   .name = "ip4-lookup",
1308   .vector_size = sizeof (u32),
1309
1310   .n_next_nodes = IP_LOOKUP_N_NEXT,
1311   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1312 };
1313
1314 /* Global IP4 main. */
1315 ip4_main_t ip4_main;
1316
1317 clib_error_t *
1318 ip4_lookup_init (vlib_main_t * vm)
1319 {
1320   ip4_main_t * im = &ip4_main;
1321   uword i;
1322
1323   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1324     {
1325       u32 m;
1326
1327       if (i < 32)
1328         m = pow2_mask (i) << (32 - i);
1329       else 
1330         m = ~0;
1331       im->fib_masks[i] = clib_host_to_net_u32 (m);
1332     }
1333
1334   /* Create FIB with index 0 and table id of 0. */
1335   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1336
1337   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1338
1339   {
1340     pg_node_t * pn;
1341     pn = pg_get_node (ip4_lookup_node.index);
1342     pn->unformat_edit = unformat_pg_ip4_header;
1343   }
1344
1345   {
1346     ethernet_arp_header_t h;
1347
1348     memset (&h, 0, sizeof (h));
1349
1350     /* Set target ethernet address to all zeros. */
1351     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1352
1353 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1354 #define _8(f,v) h.f = v;
1355     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1356     _16 (l3_type, ETHERNET_TYPE_IP4);
1357     _8 (n_l2_address_bytes, 6);
1358     _8 (n_l3_address_bytes, 4);
1359     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1360 #undef _16
1361 #undef _8
1362
1363     vlib_packet_template_init (vm,
1364                                &im->ip4_arp_request_packet_template,
1365                                /* data */ &h,
1366                                sizeof (h),
1367                                /* alloc chunk size */ 8,
1368                                "ip4 arp");
1369   }
1370
1371   return 0;
1372 }
1373
1374 VLIB_INIT_FUNCTION (ip4_lookup_init);
1375
1376 typedef struct {
1377   /* Adjacency taken. */
1378   u32 adj_index;
1379   u32 flow_hash;
1380   u32 fib_index;
1381
1382   /* Packet data, possibly *after* rewrite. */
1383   u8 packet_data[64 - 1*sizeof(u32)];
1384 } ip4_forward_next_trace_t;
1385
1386 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1387 {
1388   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1389   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1390   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1391   vnet_main_t * vnm = vnet_get_main();
1392   ip4_main_t * im = &ip4_main;
1393   ip_adjacency_t * adj;
1394   uword indent = format_get_indent (s);
1395
1396   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1397   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1398               t->fib_index, t->adj_index, format_ip_adjacency,
1399               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1400   switch (adj->lookup_next_index)
1401     {
1402     case IP_LOOKUP_NEXT_REWRITE:
1403       s = format (s, "\n%U%U",
1404                   format_white_space, indent,
1405                   format_ip_adjacency_packet_data,
1406                   vnm, &im->lookup_main, t->adj_index,
1407                   t->packet_data, sizeof (t->packet_data));
1408       break;
1409
1410     default:
1411       break;
1412     }
1413
1414   return s;
1415 }
1416
1417 /* Common trace function for all ip4-forward next nodes. */
1418 void
1419 ip4_forward_next_trace (vlib_main_t * vm,
1420                         vlib_node_runtime_t * node,
1421                         vlib_frame_t * frame,
1422                         vlib_rx_or_tx_t which_adj_index)
1423 {
1424   u32 * from, n_left;
1425   ip4_main_t * im = &ip4_main;
1426
1427   n_left = frame->n_vectors;
1428   from = vlib_frame_vector_args (frame);
1429   
1430   while (n_left >= 4)
1431     {
1432       u32 bi0, bi1;
1433       vlib_buffer_t * b0, * b1;
1434       ip4_forward_next_trace_t * t0, * t1;
1435
1436       /* Prefetch next iteration. */
1437       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1438       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1439
1440       bi0 = from[0];
1441       bi1 = from[1];
1442
1443       b0 = vlib_get_buffer (vm, bi0);
1444       b1 = vlib_get_buffer (vm, bi1);
1445
1446       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1447         {
1448           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1449           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1450           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1451           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1452                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1453           memcpy (t0->packet_data,
1454                   vlib_buffer_get_current (b0),
1455                   sizeof (t0->packet_data));
1456         }
1457       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1458         {
1459           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1460           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1461           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1462           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1463                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1464           memcpy (t1->packet_data,
1465                   vlib_buffer_get_current (b1),
1466                   sizeof (t1->packet_data));
1467         }
1468       from += 2;
1469       n_left -= 2;
1470     }
1471
1472   while (n_left >= 1)
1473     {
1474       u32 bi0;
1475       vlib_buffer_t * b0;
1476       ip4_forward_next_trace_t * t0;
1477
1478       bi0 = from[0];
1479
1480       b0 = vlib_get_buffer (vm, bi0);
1481
1482       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1483         {
1484           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1485           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1486           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1487           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1488                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1489           memcpy (t0->packet_data,
1490                   vlib_buffer_get_current (b0),
1491                   sizeof (t0->packet_data));
1492         }
1493       from += 1;
1494       n_left -= 1;
1495     }
1496 }
1497
1498 static uword
1499 ip4_drop_or_punt (vlib_main_t * vm,
1500                   vlib_node_runtime_t * node,
1501                   vlib_frame_t * frame,
1502                   ip4_error_t error_code)
1503 {
1504   u32 * buffers = vlib_frame_vector_args (frame);
1505   uword n_packets = frame->n_vectors;
1506
1507   vlib_error_drop_buffers (vm, node,
1508                            buffers,
1509                            /* stride */ 1,
1510                            n_packets,
1511                            /* next */ 0,
1512                            ip4_input_node.index,
1513                            error_code);
1514
1515   if (node->flags & VLIB_NODE_FLAG_TRACE)
1516     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1517
1518   return n_packets;
1519 }
1520
1521 static uword
1522 ip4_drop (vlib_main_t * vm,
1523           vlib_node_runtime_t * node,
1524           vlib_frame_t * frame)
1525 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1526
1527 static uword
1528 ip4_punt (vlib_main_t * vm,
1529           vlib_node_runtime_t * node,
1530           vlib_frame_t * frame)
1531 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1532
1533 static uword
1534 ip4_miss (vlib_main_t * vm,
1535           vlib_node_runtime_t * node,
1536           vlib_frame_t * frame)
1537 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1538
1539 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1540   .function = ip4_drop,
1541   .name = "ip4-drop",
1542   .vector_size = sizeof (u32),
1543
1544   .format_trace = format_ip4_forward_next_trace,
1545
1546   .n_next_nodes = 1,
1547   .next_nodes = {
1548     [0] = "error-drop",
1549   },
1550 };
1551
1552 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1553   .function = ip4_punt,
1554   .name = "ip4-punt",
1555   .vector_size = sizeof (u32),
1556
1557   .format_trace = format_ip4_forward_next_trace,
1558
1559   .n_next_nodes = 1,
1560   .next_nodes = {
1561     [0] = "error-punt",
1562   },
1563 };
1564
1565 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1566   .function = ip4_miss,
1567   .name = "ip4-miss",
1568   .vector_size = sizeof (u32),
1569
1570   .format_trace = format_ip4_forward_next_trace,
1571
1572   .n_next_nodes = 1,
1573   .next_nodes = {
1574     [0] = "error-drop",
1575   },
1576 };
1577
1578 /* Compute TCP/UDP/ICMP4 checksum in software. */
1579 u16
1580 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1581                               ip4_header_t * ip0)
1582 {
1583   ip_csum_t sum0;
1584   u32 ip_header_length, payload_length_host_byte_order;
1585   u32 n_this_buffer, n_bytes_left;
1586   u16 sum16;
1587   void * data_this_buffer;
1588   
1589   /* Initialize checksum with ip header. */
1590   ip_header_length = ip4_header_bytes (ip0);
1591   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1592   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1593
1594   if (BITS (uword) == 32)
1595     {
1596       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1597       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1598     }
1599   else
1600     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1601
1602   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1603   data_this_buffer = (void *) ip0 + ip_header_length;
1604   if (n_this_buffer + ip_header_length > p0->current_length)
1605     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1606   while (1)
1607     {
1608       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1609       n_bytes_left -= n_this_buffer;
1610       if (n_bytes_left == 0)
1611         break;
1612
1613       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1614       p0 = vlib_get_buffer (vm, p0->next_buffer);
1615       data_this_buffer = vlib_buffer_get_current (p0);
1616       n_this_buffer = p0->current_length;
1617     }
1618
1619   sum16 = ~ ip_csum_fold (sum0);
1620
1621   return sum16;
1622 }
1623
1624 static u32
1625 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1626 {
1627   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1628   udp_header_t * udp0;
1629   u16 sum16;
1630
1631   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1632           || ip0->protocol == IP_PROTOCOL_UDP);
1633
1634   udp0 = (void *) (ip0 + 1);
1635   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1636     {
1637       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1638                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1639       return p0->flags;
1640     }
1641
1642   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1643
1644   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1645                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1646
1647   return p0->flags;
1648 }
1649
1650 static uword
1651 ip4_local (vlib_main_t * vm,
1652            vlib_node_runtime_t * node,
1653            vlib_frame_t * frame)
1654 {
1655   ip4_main_t * im = &ip4_main;
1656   ip_lookup_main_t * lm = &im->lookup_main;
1657   ip_local_next_t next_index;
1658   u32 * from, * to_next, n_left_from, n_left_to_next;
1659   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1660
1661   from = vlib_frame_vector_args (frame);
1662   n_left_from = frame->n_vectors;
1663   next_index = node->cached_next_index;
1664   
1665   if (node->flags & VLIB_NODE_FLAG_TRACE)
1666     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1667
1668   while (n_left_from > 0)
1669     {
1670       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1671
1672       while (n_left_from >= 4 && n_left_to_next >= 2)
1673         {
1674           vlib_buffer_t * p0, * p1;
1675           ip4_header_t * ip0, * ip1;
1676           udp_header_t * udp0, * udp1;
1677           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1678           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1679           ip_adjacency_t * adj0, * adj1;
1680           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1681           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1682           i32 len_diff0, len_diff1;
1683           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1684           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1685           u8 enqueue_code;
1686       
1687           pi0 = to_next[0] = from[0];
1688           pi1 = to_next[1] = from[1];
1689           from += 2;
1690           n_left_from -= 2;
1691           to_next += 2;
1692           n_left_to_next -= 2;
1693       
1694           p0 = vlib_get_buffer (vm, pi0);
1695           p1 = vlib_get_buffer (vm, pi1);
1696
1697           ip0 = vlib_buffer_get_current (p0);
1698           ip1 = vlib_buffer_get_current (p1);
1699
1700           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1701                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1702           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1703                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1704
1705           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1706           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1707
1708           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1709
1710           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1711           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1712
1713           proto0 = ip0->protocol;
1714           proto1 = ip1->protocol;
1715           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1716           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1717           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1718           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1719
1720           flags0 = p0->flags;
1721           flags1 = p1->flags;
1722
1723           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1724           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1725
1726           udp0 = ip4_next_header (ip0);
1727           udp1 = ip4_next_header (ip1);
1728
1729           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1730           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1731           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1732
1733           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1734           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1735
1736           /* Verify UDP length. */
1737           ip_len0 = clib_net_to_host_u16 (ip0->length);
1738           ip_len1 = clib_net_to_host_u16 (ip1->length);
1739           udp_len0 = clib_net_to_host_u16 (udp0->length);
1740           udp_len1 = clib_net_to_host_u16 (udp1->length);
1741
1742           len_diff0 = ip_len0 - udp_len0;
1743           len_diff1 = ip_len1 - udp_len1;
1744
1745           len_diff0 = is_udp0 ? len_diff0 : 0;
1746           len_diff1 = is_udp1 ? len_diff1 : 0;
1747
1748           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1749                                 & good_tcp_udp0 & good_tcp_udp1)))
1750             {
1751               if (is_tcp_udp0)
1752                 {
1753                   if (is_tcp_udp0
1754                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1755                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1756                   good_tcp_udp0 =
1757                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1758                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1759                 }
1760               if (is_tcp_udp1)
1761                 {
1762                   if (is_tcp_udp1
1763                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1764                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1765                   good_tcp_udp1 =
1766                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1767                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1768                 }
1769             }
1770
1771           good_tcp_udp0 &= len_diff0 >= 0;
1772           good_tcp_udp1 &= len_diff1 >= 0;
1773
1774           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1775           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1776
1777           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1778
1779           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1780           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1781
1782           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1783           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1784                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1785                     : error0);
1786           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1787                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1788                     : error1);
1789
1790           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1791           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1792
1793           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1794           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1795
1796           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1797           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1798
1799           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1800                                                            &ip0->src_address,
1801                                                            /* no_default_route */ 1));
1802           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1803                                                            &ip1->src_address,
1804                                                            /* no_default_route */ 1));
1805
1806           adj0 = ip_get_adjacency (lm, adj_index0);
1807           adj1 = ip_get_adjacency (lm, adj_index1);
1808
1809           /* 
1810            * Must have a route to source otherwise we drop the packet.
1811            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1812            */
1813           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1814                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1815                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1816                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1817                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1818                     ? IP4_ERROR_SRC_LOOKUP_MISS
1819                     : error0);
1820           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1821                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1822                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1823                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1824                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1825                     ? IP4_ERROR_SRC_LOOKUP_MISS
1826                     : error1);
1827
1828           next0 = lm->local_next_by_ip_protocol[proto0];
1829           next1 = lm->local_next_by_ip_protocol[proto1];
1830
1831           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1832           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1833
1834           p0->error = error0 ? error_node->errors[error0] : 0;
1835           p1->error = error1 ? error_node->errors[error1] : 0;
1836
1837           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1838
1839           if (PREDICT_FALSE (enqueue_code != 0))
1840             {
1841               switch (enqueue_code)
1842                 {
1843                 case 1:
1844                   /* A B A */
1845                   to_next[-2] = pi1;
1846                   to_next -= 1;
1847                   n_left_to_next += 1;
1848                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1849                   break;
1850
1851                 case 2:
1852                   /* A A B */
1853                   to_next -= 1;
1854                   n_left_to_next += 1;
1855                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1856                   break;
1857
1858                 case 3:
1859                   /* A B B or A B C */
1860                   to_next -= 2;
1861                   n_left_to_next += 2;
1862                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1863                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1864                   if (next0 == next1)
1865                     {
1866                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1867                       next_index = next1;
1868                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1869                     }
1870                   break;
1871                 }
1872             }
1873         }
1874
1875       while (n_left_from > 0 && n_left_to_next > 0)
1876         {
1877           vlib_buffer_t * p0;
1878           ip4_header_t * ip0;
1879           udp_header_t * udp0;
1880           ip4_fib_mtrie_t * mtrie0;
1881           ip4_fib_mtrie_leaf_t leaf0;
1882           ip_adjacency_t * adj0;
1883           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1884           i32 len_diff0;
1885           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1886       
1887           pi0 = to_next[0] = from[0];
1888           from += 1;
1889           n_left_from -= 1;
1890           to_next += 1;
1891           n_left_to_next -= 1;
1892       
1893           p0 = vlib_get_buffer (vm, pi0);
1894
1895           ip0 = vlib_buffer_get_current (p0);
1896
1897           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1898                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1899
1900           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1901
1902           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1903
1904           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1905
1906           proto0 = ip0->protocol;
1907           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1908           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1909
1910           flags0 = p0->flags;
1911
1912           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1913
1914           udp0 = ip4_next_header (ip0);
1915
1916           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1917           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1918
1919           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1920
1921           /* Verify UDP length. */
1922           ip_len0 = clib_net_to_host_u16 (ip0->length);
1923           udp_len0 = clib_net_to_host_u16 (udp0->length);
1924
1925           len_diff0 = ip_len0 - udp_len0;
1926
1927           len_diff0 = is_udp0 ? len_diff0 : 0;
1928
1929           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1930             {
1931               if (is_tcp_udp0)
1932                 {
1933                   if (is_tcp_udp0
1934                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1935                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1936                   good_tcp_udp0 =
1937                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1938                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1939                 }
1940             }
1941
1942           good_tcp_udp0 &= len_diff0 >= 0;
1943
1944           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1945
1946           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1947
1948           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1949
1950           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1951           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1952                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1953                     : error0);
1954
1955           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1956
1957           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1958           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1959
1960           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1961                                                            &ip0->src_address,
1962                                                            /* no_default_route */ 1));
1963
1964           adj0 = ip_get_adjacency (lm, adj_index0);
1965
1966           /* Must have a route to source otherwise we drop the packet. */
1967           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1968                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1969                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1970                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1971                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1972                     ? IP4_ERROR_SRC_LOOKUP_MISS
1973                     : error0);
1974
1975           next0 = lm->local_next_by_ip_protocol[proto0];
1976
1977           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1978
1979           p0->error = error0? error_node->errors[error0] : 0;
1980
1981           if (PREDICT_FALSE (next0 != next_index))
1982             {
1983               n_left_to_next += 1;
1984               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1985
1986               next_index = next0;
1987               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1988               to_next[0] = pi0;
1989               to_next += 1;
1990               n_left_to_next -= 1;
1991             }
1992         }
1993   
1994       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1995     }
1996
1997   return frame->n_vectors;
1998 }
1999
2000 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2001   .function = ip4_local,
2002   .name = "ip4-local",
2003   .vector_size = sizeof (u32),
2004
2005   .format_trace = format_ip4_forward_next_trace,
2006
2007   .n_next_nodes = IP_LOCAL_N_NEXT,
2008   .next_nodes = {
2009     [IP_LOCAL_NEXT_DROP] = "error-drop",
2010     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2011     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2012     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2013   },
2014 };
2015
2016 void ip4_register_protocol (u32 protocol, u32 node_index)
2017 {
2018   vlib_main_t * vm = vlib_get_main();
2019   ip4_main_t * im = &ip4_main;
2020   ip_lookup_main_t * lm = &im->lookup_main;
2021
2022   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2023   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2024 }
2025
2026 static clib_error_t *
2027 show_ip_local_command_fn (vlib_main_t * vm,
2028                           unformat_input_t * input,
2029                          vlib_cli_command_t * cmd)
2030 {
2031   ip4_main_t * im = &ip4_main;
2032   ip_lookup_main_t * lm = &im->lookup_main;
2033   int i;
2034
2035   vlib_cli_output (vm, "Protocols handled by ip4_local");
2036   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2037     {
2038       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2039         vlib_cli_output (vm, "%d", i);
2040     }
2041   return 0;
2042 }
2043
2044
2045
2046 VLIB_CLI_COMMAND (show_ip_local, static) = {
2047   .path = "show ip local",
2048   .function = show_ip_local_command_fn,
2049   .short_help = "Show ip local protocol table",
2050 };
2051
2052 static uword
2053 ip4_arp (vlib_main_t * vm,
2054          vlib_node_runtime_t * node,
2055          vlib_frame_t * frame)
2056 {
2057   vnet_main_t * vnm = vnet_get_main();
2058   ip4_main_t * im = &ip4_main;
2059   ip_lookup_main_t * lm = &im->lookup_main;
2060   u32 * from, * to_next_drop;
2061   uword n_left_from, n_left_to_next_drop, next_index;
2062   static f64 time_last_seed_change = -1e100;
2063   static u32 hash_seeds[3];
2064   static uword hash_bitmap[256 / BITS (uword)]; 
2065   f64 time_now;
2066
2067   if (node->flags & VLIB_NODE_FLAG_TRACE)
2068     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2069
2070   time_now = vlib_time_now (vm);
2071   if (time_now - time_last_seed_change > 1e-3)
2072     {
2073       uword i;
2074       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2075                                              sizeof (hash_seeds));
2076       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2077         hash_seeds[i] = r[i];
2078
2079       /* Mark all hash keys as been no-seen before. */
2080       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2081         hash_bitmap[i] = 0;
2082
2083       time_last_seed_change = time_now;
2084     }
2085
2086   from = vlib_frame_vector_args (frame);
2087   n_left_from = frame->n_vectors;
2088   next_index = node->cached_next_index;
2089   if (next_index == IP4_ARP_NEXT_DROP)
2090     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2091
2092   while (n_left_from > 0)
2093     {
2094       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2095                            to_next_drop, n_left_to_next_drop);
2096
2097       while (n_left_from > 0 && n_left_to_next_drop > 0)
2098         {
2099           vlib_buffer_t * p0;
2100           ip4_header_t * ip0;
2101           ethernet_header_t * eh0;
2102           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2103           uword bm0;
2104           ip_adjacency_t * adj0;
2105
2106           pi0 = from[0];
2107
2108           p0 = vlib_get_buffer (vm, pi0);
2109
2110           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2111           adj0 = ip_get_adjacency (lm, adj_index0);
2112           ip0 = vlib_buffer_get_current (p0);
2113
2114           /* If packet destination is not local, send ARP to next hop */
2115           if (adj0->arp.next_hop.ip4.as_u32)
2116             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2117
2118           /* 
2119            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2120            * rewrite to this packet, we need to skip it here.
2121            * Note, to distinguish from src IP addr *.8.6.*, we
2122            * check for a bcast eth dest instead of IPv4 version.
2123            */
2124           eh0 = (ethernet_header_t*)ip0;
2125           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2126             {
2127               u32 vlan_num = 0;
2128               u16 * etype = &eh0->type;
2129               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2130                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2131                 {
2132                   vlan_num += 1;
2133                   etype += 2; //vlan tag also 16 bits, same as etype
2134                 }
2135               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2136                 {
2137                   vlib_buffer_advance (
2138                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2139                   ip0 = vlib_buffer_get_current (p0);
2140                 }
2141             }
2142
2143           a0 = hash_seeds[0];
2144           b0 = hash_seeds[1];
2145           c0 = hash_seeds[2];
2146
2147           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2148           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2149
2150           a0 ^= ip0->dst_address.data_u32;
2151           b0 ^= sw_if_index0;
2152
2153           hash_v3_finalize32 (a0, b0, c0);
2154
2155           c0 &= BITS (hash_bitmap) - 1;
2156           c0 = c0 / BITS (uword);
2157           m0 = (uword) 1 << (c0 % BITS (uword));
2158
2159           bm0 = hash_bitmap[c0];
2160           drop0 = (bm0 & m0) != 0;
2161
2162           /* Mark it as seen. */
2163           hash_bitmap[c0] = bm0 | m0;
2164
2165           from += 1;
2166           n_left_from -= 1;
2167           to_next_drop[0] = pi0;
2168           to_next_drop += 1;
2169           n_left_to_next_drop -= 1;
2170
2171           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2172
2173           if (drop0)
2174             continue;
2175
2176           /* 
2177            * Can happen if the control-plane is programming tables
2178            * with traffic flowing; at least that's today's lame excuse.
2179            */
2180           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2181             {
2182               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2183             }
2184           else
2185           /* Send ARP request. */
2186           {
2187             u32 bi0 = 0;
2188             vlib_buffer_t * b0;
2189             ethernet_arp_header_t * h0;
2190             vnet_hw_interface_t * hw_if0;
2191
2192             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2193
2194             /* Add rewrite/encap string for ARP packet. */
2195             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2196
2197             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2198
2199             /* Src ethernet address in ARP header. */
2200             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2201                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2202
2203             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2204
2205             /* Copy in destination address we are requesting. */
2206             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2207
2208             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2209             b0 = vlib_get_buffer (vm, bi0);
2210             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2211
2212             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2213
2214             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2215           }
2216         }
2217
2218       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2219     }
2220
2221   return frame->n_vectors;
2222 }
2223
2224 static char * ip4_arp_error_strings[] = {
2225   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2226   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2227   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2228   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2229   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2230 };
2231
2232 VLIB_REGISTER_NODE (ip4_arp_node) = {
2233   .function = ip4_arp,
2234   .name = "ip4-arp",
2235   .vector_size = sizeof (u32),
2236
2237   .format_trace = format_ip4_forward_next_trace,
2238
2239   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2240   .error_strings = ip4_arp_error_strings,
2241
2242   .n_next_nodes = IP4_ARP_N_NEXT,
2243   .next_nodes = {
2244     [IP4_ARP_NEXT_DROP] = "error-drop",
2245   },
2246 };
2247
2248 #define foreach_notrace_ip4_arp_error           \
2249 _(DROP)                                         \
2250 _(REQUEST_SENT)                                 \
2251 _(REPLICATE_DROP)                               \
2252 _(REPLICATE_FAIL)
2253
2254 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2255 {
2256   vlib_node_runtime_t *rt = 
2257     vlib_node_get_runtime (vm, ip4_arp_node.index);
2258
2259   /* don't trace ARP request packets */
2260 #define _(a)                                    \
2261     vnet_pcap_drop_trace_filter_add_del         \
2262         (rt->errors[IP4_ARP_ERROR_##a],         \
2263          1 /* is_add */);
2264     foreach_notrace_ip4_arp_error;
2265 #undef _
2266   return 0;
2267 }
2268
2269 VLIB_INIT_FUNCTION(arp_notrace_init);
2270
2271
2272 /* Send an ARP request to see if given destination is reachable on given interface. */
2273 clib_error_t *
2274 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2275 {
2276   vnet_main_t * vnm = vnet_get_main();
2277   ip4_main_t * im = &ip4_main;
2278   ethernet_arp_header_t * h;
2279   ip4_address_t * src;
2280   ip_interface_address_t * ia;
2281   ip_adjacency_t * adj;
2282   vnet_hw_interface_t * hi;
2283   vnet_sw_interface_t * si;
2284   vlib_buffer_t * b;
2285   u32 bi = 0;
2286
2287   si = vnet_get_sw_interface (vnm, sw_if_index);
2288
2289   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2290     {
2291       return clib_error_return (0, "%U: interface %U down",
2292                                 format_ip4_address, dst, 
2293                                 format_vnet_sw_if_index_name, vnm, 
2294                                 sw_if_index);
2295     }
2296
2297   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2298   if (! src)
2299     {
2300       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2301       return clib_error_return 
2302         (0, "no matching interface address for destination %U (interface %U)",
2303          format_ip4_address, dst,
2304          format_vnet_sw_if_index_name, vnm, sw_if_index);
2305     }
2306
2307   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2308
2309   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2310
2311   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2312
2313   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2314
2315   h->ip4_over_ethernet[0].ip4 = src[0];
2316   h->ip4_over_ethernet[1].ip4 = dst[0];
2317
2318   b = vlib_get_buffer (vm, bi);
2319   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2320
2321   /* Add encapsulation string for software interface (e.g. ethernet header). */
2322   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2323   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2324
2325   {
2326     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2327     u32 * to_next = vlib_frame_vector_args (f);
2328     to_next[0] = bi;
2329     f->n_vectors = 1;
2330     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2331   }
2332
2333   return /* no error */ 0;
2334 }
2335
2336 typedef enum {
2337   IP4_REWRITE_NEXT_DROP,
2338   IP4_REWRITE_NEXT_ARP,
2339 } ip4_rewrite_next_t;
2340
2341 always_inline uword
2342 ip4_rewrite_inline (vlib_main_t * vm,
2343                     vlib_node_runtime_t * node,
2344                     vlib_frame_t * frame,
2345                     int rewrite_for_locally_received_packets)
2346 {
2347   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2348   u32 * from = vlib_frame_vector_args (frame);
2349   u32 n_left_from, n_left_to_next, * to_next, next_index;
2350   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2351   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2352
2353   n_left_from = frame->n_vectors;
2354   next_index = node->cached_next_index;
2355   u32 cpu_index = os_get_cpu_number();
2356   
2357   while (n_left_from > 0)
2358     {
2359       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2360
2361       while (n_left_from >= 4 && n_left_to_next >= 2)
2362         {
2363           ip_adjacency_t * adj0, * adj1;
2364           vlib_buffer_t * p0, * p1;
2365           ip4_header_t * ip0, * ip1;
2366           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2367           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2368           u32 next0_override, next1_override;
2369       
2370           if (rewrite_for_locally_received_packets)
2371               next0_override = next1_override = 0;
2372
2373           /* Prefetch next iteration. */
2374           {
2375             vlib_buffer_t * p2, * p3;
2376
2377             p2 = vlib_get_buffer (vm, from[2]);
2378             p3 = vlib_get_buffer (vm, from[3]);
2379
2380             vlib_prefetch_buffer_header (p2, STORE);
2381             vlib_prefetch_buffer_header (p3, STORE);
2382
2383             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2384             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2385           }
2386
2387           pi0 = to_next[0] = from[0];
2388           pi1 = to_next[1] = from[1];
2389
2390           from += 2;
2391           n_left_from -= 2;
2392           to_next += 2;
2393           n_left_to_next -= 2;
2394       
2395           p0 = vlib_get_buffer (vm, pi0);
2396           p1 = vlib_get_buffer (vm, pi1);
2397
2398           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2399           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2400
2401           /* We should never rewrite a pkt using the MISS adjacency */
2402           ASSERT(adj_index0 && adj_index1);
2403
2404           ip0 = vlib_buffer_get_current (p0);
2405           ip1 = vlib_buffer_get_current (p1);
2406
2407           error0 = error1 = IP4_ERROR_NONE;
2408
2409           /* Decrement TTL & update checksum.
2410              Works either endian, so no need for byte swap. */
2411           if (! rewrite_for_locally_received_packets)
2412             {
2413               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2414
2415               /* Input node should have reject packets with ttl 0. */
2416               ASSERT (ip0->ttl > 0);
2417               ASSERT (ip1->ttl > 0);
2418
2419               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2420               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2421
2422               checksum0 += checksum0 >= 0xffff;
2423               checksum1 += checksum1 >= 0xffff;
2424
2425               ip0->checksum = checksum0;
2426               ip1->checksum = checksum1;
2427
2428               ttl0 -= 1;
2429               ttl1 -= 1;
2430
2431               ip0->ttl = ttl0;
2432               ip1->ttl = ttl1;
2433
2434               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2435               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2436
2437               /* Verify checksum. */
2438               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2439               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2440             }
2441
2442           /* Rewrite packet header and updates lengths. */
2443           adj0 = ip_get_adjacency (lm, adj_index0);
2444           adj1 = ip_get_adjacency (lm, adj_index1);
2445       
2446           if (rewrite_for_locally_received_packets)
2447             {
2448               /*
2449                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2450                * we end up here with a local adjacency in hand
2451                * The local adj rewrite data is 0xfefe on purpose.
2452                * Bad engineer, no donut for you.
2453                */
2454               if (PREDICT_FALSE(adj0->lookup_next_index 
2455                                 == IP_LOOKUP_NEXT_LOCAL))
2456                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2457               if (PREDICT_FALSE(adj0->lookup_next_index
2458                                 == IP_LOOKUP_NEXT_ARP))
2459                 next0_override = IP4_REWRITE_NEXT_ARP;
2460               if (PREDICT_FALSE(adj1->lookup_next_index 
2461                                 == IP_LOOKUP_NEXT_LOCAL))
2462                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2463               if (PREDICT_FALSE(adj1->lookup_next_index
2464                                 == IP_LOOKUP_NEXT_ARP))
2465                 next1_override = IP4_REWRITE_NEXT_ARP;
2466             }
2467
2468           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2469           rw_len0 = adj0[0].rewrite_header.data_bytes;
2470           rw_len1 = adj1[0].rewrite_header.data_bytes;
2471           next0 = (error0 == IP4_ERROR_NONE) 
2472             ? adj0[0].rewrite_header.next_index : 0;
2473
2474           if (rewrite_for_locally_received_packets)
2475               next0 = next0 && next0_override ? next0_override : next0;
2476
2477           next1 = (error1 == IP4_ERROR_NONE)
2478             ? adj1[0].rewrite_header.next_index : 0;
2479
2480           if (rewrite_for_locally_received_packets)
2481               next1 = next1 && next1_override ? next1_override : next1;
2482
2483           /* 
2484            * We've already accounted for an ethernet_header_t elsewhere
2485            */
2486           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2487               vlib_increment_combined_counter 
2488                   (&lm->adjacency_counters,
2489                    cpu_index, adj_index0, 
2490                    /* packet increment */ 0,
2491                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2492
2493           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2494               vlib_increment_combined_counter 
2495                   (&lm->adjacency_counters,
2496                    cpu_index, adj_index1, 
2497                    /* packet increment */ 0,
2498                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2499
2500           /* Check MTU of outgoing interface. */
2501           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2502                     ? IP4_ERROR_MTU_EXCEEDED
2503                     : error0);
2504           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2505                     ? IP4_ERROR_MTU_EXCEEDED
2506                     : error1);
2507
2508           p0->current_data -= rw_len0;
2509           p1->current_data -= rw_len1;
2510
2511           p0->current_length += rw_len0;
2512           p1->current_length += rw_len1;
2513
2514           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2515           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2516       
2517           p0->error = error_node->errors[error0];
2518           p1->error = error_node->errors[error1];
2519
2520           /* Guess we are only writing on simple Ethernet header. */
2521           vnet_rewrite_two_headers (adj0[0], adj1[0],
2522                                     ip0, ip1,
2523                                     sizeof (ethernet_header_t));
2524       
2525           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2526                                            to_next, n_left_to_next,
2527                                            pi0, pi1, next0, next1);
2528         }
2529
2530       while (n_left_from > 0 && n_left_to_next > 0)
2531         {
2532           ip_adjacency_t * adj0;
2533           vlib_buffer_t * p0;
2534           ip4_header_t * ip0;
2535           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2536           u32 next0_override;
2537       
2538           if (rewrite_for_locally_received_packets)
2539               next0_override = 0;
2540
2541           pi0 = to_next[0] = from[0];
2542
2543           p0 = vlib_get_buffer (vm, pi0);
2544
2545           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2546
2547           /* We should never rewrite a pkt using the MISS adjacency */
2548           ASSERT(adj_index0);
2549
2550           adj0 = ip_get_adjacency (lm, adj_index0);
2551       
2552           ip0 = vlib_buffer_get_current (p0);
2553
2554           error0 = IP4_ERROR_NONE;
2555           next0 = 0;            /* drop on error */
2556
2557           /* Decrement TTL & update checksum. */
2558           if (! rewrite_for_locally_received_packets)
2559             {
2560               i32 ttl0 = ip0->ttl;
2561
2562               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2563
2564               checksum0 += checksum0 >= 0xffff;
2565
2566               ip0->checksum = checksum0;
2567
2568               ASSERT (ip0->ttl > 0);
2569
2570               ttl0 -= 1;
2571
2572               ip0->ttl = ttl0;
2573
2574               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2575
2576               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2577             }
2578
2579           if (rewrite_for_locally_received_packets)
2580             {
2581               /*
2582                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2583                * we end up here with a local adjacency in hand
2584                * The local adj rewrite data is 0xfefe on purpose.
2585                * Bad engineer, no donut for you.
2586                */
2587               if (PREDICT_FALSE(adj0->lookup_next_index 
2588                                 == IP_LOOKUP_NEXT_LOCAL))
2589                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2590               /* 
2591                * We have to override the next_index in ARP adjacencies,
2592                * because they're set up for ip4-arp, not this node...
2593                */
2594               if (PREDICT_FALSE(adj0->lookup_next_index
2595                                 == IP_LOOKUP_NEXT_ARP))
2596                 next0_override = IP4_REWRITE_NEXT_ARP;
2597             }
2598
2599           /* Guess we are only writing on simple Ethernet header. */
2600           vnet_rewrite_one_header (adj0[0], ip0, 
2601                                    sizeof (ethernet_header_t));
2602           
2603           /* Update packet buffer attributes/set output interface. */
2604           rw_len0 = adj0[0].rewrite_header.data_bytes;
2605           
2606           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2607               vlib_increment_combined_counter 
2608                   (&lm->adjacency_counters,
2609                    cpu_index, adj_index0, 
2610                    /* packet increment */ 0,
2611                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2612           
2613           /* Check MTU of outgoing interface. */
2614           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2615                     > adj0[0].rewrite_header.max_l3_packet_bytes
2616                     ? IP4_ERROR_MTU_EXCEEDED
2617                     : error0);
2618           
2619           p0->error = error_node->errors[error0];
2620           p0->current_data -= rw_len0;
2621           p0->current_length += rw_len0;
2622           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2623             adj0[0].rewrite_header.sw_if_index;
2624           
2625           next0 = (error0 == IP4_ERROR_NONE)
2626             ? adj0[0].rewrite_header.next_index : 0;
2627
2628           if (rewrite_for_locally_received_packets)
2629               next0 = next0 && next0_override ? next0_override : next0;
2630
2631           from += 1;
2632           n_left_from -= 1;
2633           to_next += 1;
2634           n_left_to_next -= 1;
2635       
2636           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2637                                            to_next, n_left_to_next,
2638                                            pi0, next0);
2639         }
2640   
2641       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2642     }
2643
2644   /* Need to do trace after rewrites to pick up new packet data. */
2645   if (node->flags & VLIB_NODE_FLAG_TRACE)
2646     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2647
2648   return frame->n_vectors;
2649 }
2650
2651 static uword
2652 ip4_rewrite_transit (vlib_main_t * vm,
2653                      vlib_node_runtime_t * node,
2654                      vlib_frame_t * frame)
2655 {
2656   return ip4_rewrite_inline (vm, node, frame,
2657                              /* rewrite_for_locally_received_packets */ 0);
2658 }
2659
2660 static uword
2661 ip4_rewrite_local (vlib_main_t * vm,
2662                    vlib_node_runtime_t * node,
2663                    vlib_frame_t * frame)
2664 {
2665   return ip4_rewrite_inline (vm, node, frame,
2666                              /* rewrite_for_locally_received_packets */ 1);
2667 }
2668
2669 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2670   .function = ip4_rewrite_transit,
2671   .name = "ip4-rewrite-transit",
2672   .vector_size = sizeof (u32),
2673
2674   .format_trace = format_ip4_forward_next_trace,
2675
2676   .n_next_nodes = 2,
2677   .next_nodes = {
2678     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2679     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2680   },
2681 };
2682
2683 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2684   .function = ip4_rewrite_local,
2685   .name = "ip4-rewrite-local",
2686   .vector_size = sizeof (u32),
2687
2688   .sibling_of = "ip4-rewrite-transit",
2689
2690   .format_trace = format_ip4_forward_next_trace,
2691
2692   .n_next_nodes = 2,
2693   .next_nodes = {
2694     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2695     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2696   },
2697 };
2698
2699 static clib_error_t *
2700 add_del_interface_table (vlib_main_t * vm,
2701                          unformat_input_t * input,
2702                          vlib_cli_command_t * cmd)
2703 {
2704   vnet_main_t * vnm = vnet_get_main();
2705   clib_error_t * error = 0;
2706   u32 sw_if_index, table_id;
2707
2708   sw_if_index = ~0;
2709
2710   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2711     {
2712       error = clib_error_return (0, "unknown interface `%U'",
2713                                  format_unformat_error, input);
2714       goto done;
2715     }
2716
2717   if (unformat (input, "%d", &table_id))
2718     ;
2719   else
2720     {
2721       error = clib_error_return (0, "expected table id `%U'",
2722                                  format_unformat_error, input);
2723       goto done;
2724     }
2725
2726   {
2727     ip4_main_t * im = &ip4_main;
2728     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2729
2730     if (fib) 
2731       {
2732         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2733         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2734     }
2735   }
2736
2737  done:
2738   return error;
2739 }
2740
2741 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2742   .path = "set interface ip table",
2743   .function = add_del_interface_table,
2744   .short_help = "Add/delete FIB table id for interface",
2745 };
2746
2747
2748 static uword
2749 ip4_lookup_multicast (vlib_main_t * vm,
2750                       vlib_node_runtime_t * node,
2751                       vlib_frame_t * frame)
2752 {
2753   ip4_main_t * im = &ip4_main;
2754   ip_lookup_main_t * lm = &im->lookup_main;
2755   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2756   u32 n_left_from, n_left_to_next, * from, * to_next;
2757   ip_lookup_next_t next;
2758   u32 cpu_index = os_get_cpu_number();
2759
2760   from = vlib_frame_vector_args (frame);
2761   n_left_from = frame->n_vectors;
2762   next = node->cached_next_index;
2763
2764   while (n_left_from > 0)
2765     {
2766       vlib_get_next_frame (vm, node, next,
2767                            to_next, n_left_to_next);
2768
2769       while (n_left_from >= 4 && n_left_to_next >= 2)
2770         {
2771           vlib_buffer_t * p0, * p1;
2772           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2773           ip_lookup_next_t next0, next1;
2774           ip4_header_t * ip0, * ip1;
2775           ip_adjacency_t * adj0, * adj1;
2776           u32 fib_index0, fib_index1;
2777           u32 flow_hash_config0, flow_hash_config1;
2778
2779           /* Prefetch next iteration. */
2780           {
2781             vlib_buffer_t * p2, * p3;
2782
2783             p2 = vlib_get_buffer (vm, from[2]);
2784             p3 = vlib_get_buffer (vm, from[3]);
2785
2786             vlib_prefetch_buffer_header (p2, LOAD);
2787             vlib_prefetch_buffer_header (p3, LOAD);
2788
2789             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2790             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2791           }
2792
2793           pi0 = to_next[0] = from[0];
2794           pi1 = to_next[1] = from[1];
2795
2796           p0 = vlib_get_buffer (vm, pi0);
2797           p1 = vlib_get_buffer (vm, pi1);
2798
2799           ip0 = vlib_buffer_get_current (p0);
2800           ip1 = vlib_buffer_get_current (p1);
2801
2802           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2803           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2804           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2805             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2806           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2807             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2808
2809           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2810                                               &ip0->dst_address, p0);
2811           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2812                                               &ip1->dst_address, p1);
2813
2814           adj0 = ip_get_adjacency (lm, adj_index0);
2815           adj1 = ip_get_adjacency (lm, adj_index1);
2816
2817           next0 = adj0->lookup_next_index;
2818           next1 = adj1->lookup_next_index;
2819
2820           flow_hash_config0 = 
2821               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2822
2823           flow_hash_config1 = 
2824               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2825
2826           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2827               (ip0, flow_hash_config0);
2828                                                                   
2829           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2830               (ip1, flow_hash_config1);
2831
2832           ASSERT (adj0->n_adj > 0);
2833           ASSERT (adj1->n_adj > 0);
2834           ASSERT (is_pow2 (adj0->n_adj));
2835           ASSERT (is_pow2 (adj1->n_adj));
2836           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2837           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2838
2839           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2840           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2841
2842           if (1) /* $$$$$$ HACK FIXME */
2843           vlib_increment_combined_counter 
2844               (cm, cpu_index, adj_index0, 1,
2845                vlib_buffer_length_in_chain (vm, p0));
2846           if (1) /* $$$$$$ HACK FIXME */
2847           vlib_increment_combined_counter 
2848               (cm, cpu_index, adj_index1, 1,
2849                vlib_buffer_length_in_chain (vm, p1));
2850
2851           from += 2;
2852           to_next += 2;
2853           n_left_to_next -= 2;
2854           n_left_from -= 2;
2855
2856           wrong_next = (next0 != next) + 2*(next1 != next);
2857           if (PREDICT_FALSE (wrong_next != 0))
2858             {
2859               switch (wrong_next)
2860                 {
2861                 case 1:
2862                   /* A B A */
2863                   to_next[-2] = pi1;
2864                   to_next -= 1;
2865                   n_left_to_next += 1;
2866                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2867                   break;
2868
2869                 case 2:
2870                   /* A A B */
2871                   to_next -= 1;
2872                   n_left_to_next += 1;
2873                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2874                   break;
2875
2876                 case 3:
2877                   /* A B C */
2878                   to_next -= 2;
2879                   n_left_to_next += 2;
2880                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2881                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2882                   if (next0 == next1)
2883                     {
2884                       /* A B B */
2885                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2886                       next = next1;
2887                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2888                     }
2889                 }
2890             }
2891         }
2892     
2893       while (n_left_from > 0 && n_left_to_next > 0)
2894         {
2895           vlib_buffer_t * p0;
2896           ip4_header_t * ip0;
2897           u32 pi0, adj_index0;
2898           ip_lookup_next_t next0;
2899           ip_adjacency_t * adj0;
2900           u32 fib_index0;
2901           u32 flow_hash_config0;
2902
2903           pi0 = from[0];
2904           to_next[0] = pi0;
2905
2906           p0 = vlib_get_buffer (vm, pi0);
2907
2908           ip0 = vlib_buffer_get_current (p0);
2909
2910           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2911                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2912           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2913               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2914           
2915           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2916                                               &ip0->dst_address, p0);
2917
2918           adj0 = ip_get_adjacency (lm, adj_index0);
2919
2920           next0 = adj0->lookup_next_index;
2921
2922           flow_hash_config0 = 
2923               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2924
2925           vnet_buffer (p0)->ip.flow_hash = 
2926             ip4_compute_flow_hash (ip0, flow_hash_config0);
2927
2928           ASSERT (adj0->n_adj > 0);
2929           ASSERT (is_pow2 (adj0->n_adj));
2930           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2931
2932           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2933
2934           if (1) /* $$$$$$ HACK FIXME */
2935               vlib_increment_combined_counter 
2936                   (cm, cpu_index, adj_index0, 1,
2937                    vlib_buffer_length_in_chain (vm, p0));
2938
2939           from += 1;
2940           to_next += 1;
2941           n_left_to_next -= 1;
2942           n_left_from -= 1;
2943
2944           if (PREDICT_FALSE (next0 != next))
2945             {
2946               n_left_to_next += 1;
2947               vlib_put_next_frame (vm, node, next, n_left_to_next);
2948               next = next0;
2949               vlib_get_next_frame (vm, node, next,
2950                                    to_next, n_left_to_next);
2951               to_next[0] = pi0;
2952               to_next += 1;
2953               n_left_to_next -= 1;
2954             }
2955         }
2956
2957       vlib_put_next_frame (vm, node, next, n_left_to_next);
2958     }
2959
2960   return frame->n_vectors;
2961 }
2962
2963 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2964   .function = ip4_lookup_multicast,
2965   .name = "ip4-lookup-multicast",
2966   .vector_size = sizeof (u32),
2967
2968   .n_next_nodes = IP_LOOKUP_N_NEXT,
2969   .next_nodes = IP4_LOOKUP_NEXT_NODES,
2970 };
2971
2972 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2973   .function = ip4_drop,
2974   .name = "ip4-multicast",
2975   .vector_size = sizeof (u32),
2976
2977   .format_trace = format_ip4_forward_next_trace,
2978
2979   .n_next_nodes = 1,
2980   .next_nodes = {
2981     [0] = "error-drop",
2982   },
2983 };
2984
2985 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2986 {
2987   ip4_main_t * im = &ip4_main;
2988   ip4_fib_mtrie_t * mtrie0;
2989   ip4_fib_mtrie_leaf_t leaf0;
2990   u32 adj_index0;
2991     
2992   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2993
2994   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2995   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2996   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2997   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2998   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2999   
3000   /* Handle default route. */
3001   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3002   
3003   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3004   
3005   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3006                                                   a, 
3007                                                   /* no_default_route */ 0);
3008 }
3009  
3010 static clib_error_t *
3011 test_lookup_command_fn (vlib_main_t * vm,
3012                         unformat_input_t * input,
3013                         vlib_cli_command_t * cmd)
3014 {
3015   u32 table_id = 0;
3016   f64 count = 1;
3017   u32 n;
3018   int i;
3019   ip4_address_t ip4_base_address;
3020   u64 errors = 0;
3021
3022   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3023       if (unformat (input, "table %d", &table_id))
3024         ;
3025       else if (unformat (input, "count %f", &count))
3026         ;
3027
3028       else if (unformat (input, "%U",
3029                          unformat_ip4_address, &ip4_base_address))
3030         ;
3031       else
3032         return clib_error_return (0, "unknown input `%U'",
3033                                   format_unformat_error, input);
3034   }
3035
3036   n = count;
3037
3038   for (i = 0; i < n; i++)
3039     {
3040       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3041         errors++;
3042
3043       ip4_base_address.as_u32 = 
3044         clib_host_to_net_u32 (1 + 
3045                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3046     }
3047
3048   if (errors) 
3049     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3050   else
3051     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3052
3053   return 0;
3054 }
3055
3056 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3057     .path = "test lookup",
3058     .short_help = "test lookup",
3059     .function = test_lookup_command_fn,
3060 };
3061
3062 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3063 {
3064   ip4_main_t * im4 = &ip4_main;
3065   ip4_fib_t * fib;
3066   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3067
3068   if (p == 0)
3069     return VNET_API_ERROR_NO_SUCH_FIB;
3070
3071   fib = vec_elt_at_index (im4->fibs, p[0]);
3072
3073   fib->flow_hash_config = flow_hash_config;
3074   return 0;
3075 }
3076  
3077 static clib_error_t *
3078 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3079                              unformat_input_t * input,
3080                              vlib_cli_command_t * cmd)
3081 {
3082   int matched = 0;
3083   u32 table_id = 0;
3084   u32 flow_hash_config = 0;
3085   int rv;
3086
3087   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3088     if (unformat (input, "table %d", &table_id))
3089       matched = 1;
3090 #define _(a,v) \
3091     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3092     foreach_flow_hash_bit
3093 #undef _
3094     else break;
3095   }
3096   
3097   if (matched == 0)
3098     return clib_error_return (0, "unknown input `%U'",
3099                               format_unformat_error, input);
3100   
3101   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3102   switch (rv)
3103     {
3104     case 0:
3105       break;
3106       
3107     case VNET_API_ERROR_NO_SUCH_FIB:
3108       return clib_error_return (0, "no such FIB table %d", table_id);
3109       
3110     default:
3111       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3112       break;
3113     }
3114   
3115   return 0;
3116 }
3117  
3118 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3119   .path = "set ip flow-hash",
3120   .short_help = 
3121   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3122   .function = set_ip_flow_hash_command_fn,
3123 };
3124  
3125 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3126                                  u32 table_index)
3127 {
3128   vnet_main_t * vnm = vnet_get_main();
3129   vnet_interface_main_t * im = &vnm->interface_main;
3130   ip4_main_t * ipm = &ip4_main;
3131   ip_lookup_main_t * lm = &ipm->lookup_main;
3132   vnet_classify_main_t * cm = &vnet_classify_main;
3133
3134   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3135     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3136
3137   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3138     return VNET_API_ERROR_NO_SUCH_ENTRY;
3139
3140   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3141   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3142
3143   return 0;
3144 }
3145
3146 static clib_error_t *
3147 set_ip_classify_command_fn (vlib_main_t * vm,
3148                             unformat_input_t * input,
3149                             vlib_cli_command_t * cmd)
3150 {
3151   u32 table_index = ~0;
3152   int table_index_set = 0;
3153   u32 sw_if_index = ~0;
3154   int rv;
3155   
3156   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3157     if (unformat (input, "table-index %d", &table_index))
3158       table_index_set = 1;
3159     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3160                        vnet_get_main(), &sw_if_index))
3161       ;
3162     else
3163       break;
3164   }
3165       
3166   if (table_index_set == 0)
3167     return clib_error_return (0, "classify table-index must be specified");
3168
3169   if (sw_if_index == ~0)
3170     return clib_error_return (0, "interface / subif must be specified");
3171
3172   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3173
3174   switch (rv)
3175     {
3176     case 0:
3177       break;
3178
3179     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3180       return clib_error_return (0, "No such interface");
3181
3182     case VNET_API_ERROR_NO_SUCH_ENTRY:
3183       return clib_error_return (0, "No such classifier table");
3184     }
3185   return 0;
3186 }
3187
3188 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3189     .path = "set ip classify",
3190     .short_help = 
3191     "set ip classify intfc <int> table-index <index>",
3192     .function = set_ip_classify_command_fn,
3193 };
3194