Move classify_table_index under the union
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /* This is really, really simple but stupid fib. */
49 u32
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
51                            ip4_address_t * dst,
52                            u32 disable_default_route)
53 {
54   ip_lookup_main_t * lm = &im->lookup_main;
55   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56   uword * p, * hash, key;
57   i32 i, i_min, dst_address, ai;
58
59   i_min = disable_default_route ? 1 : 0;
60   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
62     {
63       hash = fib->adj_index_by_dst_address[i];
64       if (! hash)
65         continue;
66
67       key = dst_address & im->fib_masks[i];
68       if ((p = hash_get (hash, key)) != 0)
69         {
70           ai = p[0];
71           goto done;
72         }
73     }
74     
75   /* Nothing matches in table. */
76   ai = lm->miss_adj_index;
77
78  done:
79   return ai;
80 }
81
82 static ip4_fib_t *
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
84 {
85   ip4_fib_t * fib;
86   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87   vec_add2 (im->fibs, fib, 1);
88   fib->table_id = table_id;
89   fib->index = fib - im->fibs;
90   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91   fib->fwd_classify_table_index = ~0;
92   fib->rev_classify_table_index = ~0;
93   ip4_mtrie_init (&fib->mtrie);
94   return fib;
95 }
96
97 ip4_fib_t *
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
99                                    u32 table_index_or_id, u32 flags)
100 {
101   uword * p, fib_index;
102
103   fib_index = table_index_or_id;
104   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
105     {
106       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
107       if (! p)
108         return create_fib_with_table_id (im, table_index_or_id);
109       fib_index = p[0];
110     }
111   return vec_elt_at_index (im->fibs, fib_index);
112 }
113
114 static void
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
116                                        ip4_fib_t * fib,
117                                        u32 address_length)
118 {
119   hash_t * h;
120   uword max_index;
121
122   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
124
125   fib->adj_index_by_dst_address[address_length] =
126     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
127
128   hash_set_flags (fib->adj_index_by_dst_address[address_length],
129                   HASH_FLAG_NO_AUTO_SHRINK);
130
131   h = hash_header (fib->adj_index_by_dst_address[address_length]);
132   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
133
134   /* Initialize new/old hash value vectors. */
135   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
137 }
138
139 static void
140 ip4_fib_set_adj_index (ip4_main_t * im,
141                        ip4_fib_t * fib,
142                        u32 flags,
143                        u32 dst_address_u32,
144                        u32 dst_address_length,
145                        u32 adj_index)
146 {
147   ip_lookup_main_t * lm = &im->lookup_main;
148   uword * hash;
149
150   if (vec_bytes(fib->old_hash_values))
151     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152   if (vec_bytes(fib->new_hash_values))
153     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154   fib->new_hash_values[0] = adj_index;
155
156   /* Make sure adj index is valid. */
157   if (CLIB_DEBUG > 0)
158     (void) ip_get_adjacency (lm, adj_index);
159
160   hash = fib->adj_index_by_dst_address[dst_address_length];
161
162   hash = _hash_set3 (hash, dst_address_u32,
163                      fib->new_hash_values,
164                      fib->old_hash_values);
165
166   fib->adj_index_by_dst_address[dst_address_length] = hash;
167
168   if (vec_len (im->add_del_route_callbacks) > 0)
169     {
170       ip4_add_del_route_callback_t * cb;
171       ip4_address_t d;
172       uword * p;
173
174       d.data_u32 = dst_address_u32;
175       vec_foreach (cb, im->add_del_route_callbacks)
176         if ((flags & cb->required_flags) == cb->required_flags)
177           cb->function (im, cb->function_opaque,
178                         fib, flags,
179                         &d, dst_address_length,
180                         fib->old_hash_values,
181                         fib->new_hash_values);
182
183       p = hash_get (hash, dst_address_u32);
184       memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
185     }
186 }
187
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
189 {
190   ip_lookup_main_t * lm = &im->lookup_main;
191   ip4_fib_t * fib;
192   u32 dst_address, dst_address_length, adj_index, old_adj_index;
193   uword * hash, is_del;
194   ip4_add_del_route_callback_t * cb;
195
196   /* Either create new adjacency or use given one depending on arguments. */
197   if (a->n_add_adj > 0)
198     {
199       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
201     }
202   else
203     adj_index = a->adj_index;
204
205   dst_address = a->dst_address.data_u32;
206   dst_address_length = a->dst_address_length;
207   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
208
209   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210   dst_address &= im->fib_masks[dst_address_length];
211
212   if (! fib->adj_index_by_dst_address[dst_address_length])
213     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
214
215   hash = fib->adj_index_by_dst_address[dst_address_length];
216
217   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
218
219   if (is_del)
220     {
221       fib->old_hash_values[0] = ~0;
222       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223       fib->adj_index_by_dst_address[dst_address_length] = hash;
224
225       if (vec_len (im->add_del_route_callbacks) > 0
226           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
227         {
228           fib->new_hash_values[0] = ~0;
229           vec_foreach (cb, im->add_del_route_callbacks)
230             if ((a->flags & cb->required_flags) == cb->required_flags)
231               cb->function (im, cb->function_opaque,
232                             fib, a->flags,
233                             &a->dst_address, dst_address_length,
234                             fib->old_hash_values,
235                             fib->new_hash_values);
236         }
237     }
238   else
239     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
240                            adj_index);
241
242   old_adj_index = fib->old_hash_values[0];
243
244   /* Avoid spurious reference count increments */
245   if (old_adj_index == adj_index)
246     {
247       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248       if (adj->share_count > 0)
249         adj->share_count --;
250     }
251
252   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253                                is_del ? old_adj_index : adj_index,
254                                is_del);
255
256   /* Delete old adjacency index if present and changed. */
257   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258       && old_adj_index != ~0
259       && old_adj_index != adj_index)
260     ip_del_adjacency (lm, old_adj_index);
261 }
262
263 void
264 ip4_add_del_route_next_hop (ip4_main_t * im,
265                             u32 flags,
266                             ip4_address_t * dst_address,
267                             u32 dst_address_length,
268                             ip4_address_t * next_hop,
269                             u32 next_hop_sw_if_index,
270                             u32 next_hop_weight, u32 adj_index, 
271                             u32 explicit_fib_index)
272 {
273   vnet_main_t * vnm = vnet_get_main();
274   ip_lookup_main_t * lm = &im->lookup_main;
275   u32 fib_index;
276   ip4_fib_t * fib;
277   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278   u32 dst_adj_index, nh_adj_index;
279   uword * dst_hash, * dst_result;
280   uword * nh_hash, * nh_result;
281   ip_adjacency_t * dst_adj;
282   ip_multipath_adjacency_t * old_mp, * new_mp;
283   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284   int is_interface_next_hop;
285   clib_error_t * error = 0;
286
287   if (explicit_fib_index == (u32)~0)
288       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
289   else
290       fib_index = explicit_fib_index;
291
292   fib = vec_elt_at_index (im->fibs, fib_index);
293   
294   /* Lookup next hop to be added or deleted. */
295   is_interface_next_hop = next_hop->data_u32 == 0;
296   if (adj_index == (u32)~0)
297     {
298       if (is_interface_next_hop)
299         {
300           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
301           if (nh_result)
302             nh_adj_index = *nh_result;
303           else
304             {
305               ip_adjacency_t * adj;
306               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
307                                       &nh_adj_index);
308               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
311             }
312         }
313       else
314         {
315           nh_hash = fib->adj_index_by_dst_address[32];
316           nh_result = hash_get (nh_hash, next_hop->data_u32);
317           
318           /* Next hop must be known. */
319           if (! nh_result)
320             {
321               vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
322               error = clib_error_return (0, "next-hop %U/32 not in FIB",
323                                          format_ip4_address, next_hop);
324               goto done;
325             }
326           nh_adj_index = *nh_result;
327         }
328     }
329   else
330     {
331       nh_adj_index = adj_index;
332     }
333   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
334   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
335
336   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
337   dst_result = hash_get (dst_hash, dst_address_u32);
338   if (dst_result)
339     {
340       dst_adj_index = dst_result[0];
341       dst_adj = ip_get_adjacency (lm, dst_adj_index);
342     }
343   else
344     {
345       /* For deletes destination must be known. */
346       if (is_del)
347         {
348           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
349           error = clib_error_return (0, "unknown destination %U/%d",
350                                      format_ip4_address, dst_address,
351                                      dst_address_length);
352           goto done;
353         }
354
355       dst_adj_index = ~0;
356       dst_adj = 0;
357     }
358
359   /* Ignore adds of X/32 with next hop of X. */
360   if (! is_del
361       && dst_address_length == 32
362       && dst_address->data_u32 == next_hop->data_u32 
363       && adj_index != (u32)~0)
364     {
365       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
366       error = clib_error_return (0, "prefix matches next hop %U/%d",
367                                  format_ip4_address, dst_address,
368                                  dst_address_length);
369       goto done;
370     }
371
372   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
373
374   if (! ip_multipath_adjacency_add_del_next_hop
375       (lm, is_del,
376        old_mp_adj_index,
377        nh_adj_index,
378        next_hop_weight,
379        &new_mp_adj_index))
380     {
381       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
382       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
383                                  format_ip4_address, next_hop);
384       goto done;
385     }
386   
387   old_mp = new_mp = 0;
388   if (old_mp_adj_index != ~0)
389     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
390   if (new_mp_adj_index != ~0)
391     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
392
393   if (old_mp != new_mp)
394     {
395       ip4_add_del_route_args_t a;
396       a.table_index_or_table_id = fib_index;
397       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
398                  | IP4_ROUTE_FLAG_FIB_INDEX
399                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
400                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
401       a.dst_address = dst_address[0];
402       a.dst_address_length = dst_address_length;
403       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
404       a.add_adj = 0;
405       a.n_add_adj = 0;
406
407       ip4_add_del_route (im, &a);
408     }
409
410  done:
411   if (error)
412     clib_error_report (error);
413 }
414
415 void *
416 ip4_get_route (ip4_main_t * im,
417                u32 table_index_or_table_id,
418                u32 flags,
419                u8 * address,
420                u32 address_length)
421 {
422   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
423   u32 dst_address = * (u32 *) address;
424   uword * hash, * p;
425
426   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
427   dst_address &= im->fib_masks[address_length];
428
429   hash = fib->adj_index_by_dst_address[address_length];
430   p = hash_get (hash, dst_address);
431   return (void *) p;
432 }
433
434 void
435 ip4_foreach_matching_route (ip4_main_t * im,
436                             u32 table_index_or_table_id,
437                             u32 flags,
438                             ip4_address_t * address,
439                             u32 address_length,
440                             ip4_address_t ** results,
441                             u8 ** result_lengths)
442 {
443   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
444   u32 dst_address = address->data_u32;
445   u32 this_length = address_length;
446   
447   if (*results)
448     _vec_len (*results) = 0;
449   if (*result_lengths)
450     _vec_len (*result_lengths) = 0;
451
452   while (this_length <= 32 && vec_len (results) == 0)
453     {
454       uword k, v;
455       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
456         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
457           {
458             ip4_address_t a;
459             a.data_u32 = k;
460             vec_add1 (*results, a);
461             vec_add1 (*result_lengths, this_length);
462           }
463       }));
464
465       this_length++;
466     }
467 }
468
469 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
470                                   u32 table_index_or_table_id,
471                                   u32 flags)
472 {
473   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
474   ip_lookup_main_t * lm = &im->lookup_main;
475   u32 i, l;
476   ip4_address_t a;
477   ip4_add_del_route_callback_t * cb;
478   static ip4_address_t * to_delete;
479
480   if (lm->n_adjacency_remaps == 0)
481     return;
482
483   for (l = 0; l <= 32; l++)
484     {
485       hash_pair_t * p;
486       uword * hash = fib->adj_index_by_dst_address[l];
487
488       if (hash_elts (hash) == 0)
489         continue;
490
491       if (to_delete)
492         _vec_len (to_delete) = 0;
493
494       hash_foreach_pair (p, hash, ({
495         u32 adj_index = p->value[0];
496         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
497
498         if (m)
499           {
500             /* Record destination address from hash key. */
501             a.data_u32 = p->key;
502
503             /* New adjacency points to nothing: so delete prefix. */
504             if (m == ~0)
505               vec_add1 (to_delete, a);
506             else
507               {
508                 /* Remap to new adjacency. */
509                 memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
510
511                 /* Set new adjacency value. */
512                 fib->new_hash_values[0] = p->value[0] = m - 1;
513
514                 vec_foreach (cb, im->add_del_route_callbacks)
515                   if ((flags & cb->required_flags) == cb->required_flags)
516                     cb->function (im, cb->function_opaque,
517                                   fib, flags | IP4_ROUTE_FLAG_ADD,
518                                   &a, l,
519                                   fib->old_hash_values,
520                                   fib->new_hash_values);
521               }
522           }
523       }));
524
525       fib->new_hash_values[0] = ~0;
526       for (i = 0; i < vec_len (to_delete); i++)
527         {
528           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
529           vec_foreach (cb, im->add_del_route_callbacks)
530             if ((flags & cb->required_flags) == cb->required_flags)
531               cb->function (im, cb->function_opaque,
532                             fib, flags | IP4_ROUTE_FLAG_DEL,
533                             &a, l,
534                             fib->old_hash_values,
535                             fib->new_hash_values);
536         }
537     }
538
539   /* Also remap adjacencies in mtrie. */
540   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
541
542   /* Reset mapping table. */
543   vec_zero (lm->adjacency_remap_table);
544
545   /* All remaps have been performed. */
546   lm->n_adjacency_remaps = 0;
547 }
548
549 void ip4_delete_matching_routes (ip4_main_t * im,
550                                  u32 table_index_or_table_id,
551                                  u32 flags,
552                                  ip4_address_t * address,
553                                  u32 address_length)
554 {
555   static ip4_address_t * matching_addresses;
556   static u8 * matching_address_lengths;
557   u32 l, i;
558   ip4_add_del_route_args_t a;
559
560   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
561   a.table_index_or_table_id = table_index_or_table_id;
562   a.adj_index = ~0;
563   a.add_adj = 0;
564   a.n_add_adj = 0;
565
566   for (l = address_length + 1; l <= 32; l++)
567     {
568       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
569                                   address,
570                                   l,
571                                   &matching_addresses,
572                                   &matching_address_lengths);
573       for (i = 0; i < vec_len (matching_addresses); i++)
574         {
575           a.dst_address = matching_addresses[i];
576           a.dst_address_length = matching_address_lengths[i];
577           ip4_add_del_route (im, &a);
578         }
579     }
580
581   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
582 }
583
584 always_inline uword
585 ip4_lookup_inline (vlib_main_t * vm,
586                    vlib_node_runtime_t * node,
587                    vlib_frame_t * frame,
588                    int lookup_for_responses_to_locally_received_packets)
589 {
590   ip4_main_t * im = &ip4_main;
591   ip_lookup_main_t * lm = &im->lookup_main;
592   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
593   u32 n_left_from, n_left_to_next, * from, * to_next;
594   ip_lookup_next_t next;
595   u32 cpu_index = os_get_cpu_number();
596
597   from = vlib_frame_vector_args (frame);
598   n_left_from = frame->n_vectors;
599   next = node->cached_next_index;
600
601   while (n_left_from > 0)
602     {
603       vlib_get_next_frame (vm, node, next,
604                            to_next, n_left_to_next);
605
606       while (n_left_from >= 4 && n_left_to_next >= 2)
607         {
608           vlib_buffer_t * p0, * p1;
609           ip4_header_t * ip0, * ip1;
610           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
611           ip_lookup_next_t next0, next1;
612           ip_adjacency_t * adj0, * adj1;
613           ip4_fib_mtrie_t * mtrie0, * mtrie1;
614           ip4_fib_mtrie_leaf_t leaf0, leaf1;
615           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
616           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
617           u32 flow_hash_config0, flow_hash_config1;
618           u32 hash_c0, hash_c1;
619           u32 wrong_next;
620
621           /* Prefetch next iteration. */
622           {
623             vlib_buffer_t * p2, * p3;
624
625             p2 = vlib_get_buffer (vm, from[2]);
626             p3 = vlib_get_buffer (vm, from[3]);
627
628             vlib_prefetch_buffer_header (p2, LOAD);
629             vlib_prefetch_buffer_header (p3, LOAD);
630
631             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
632             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
633           }
634
635           pi0 = to_next[0] = from[0];
636           pi1 = to_next[1] = from[1];
637
638           p0 = vlib_get_buffer (vm, pi0);
639           p1 = vlib_get_buffer (vm, pi1);
640
641           ip0 = vlib_buffer_get_current (p0);
642           ip1 = vlib_buffer_get_current (p1);
643
644           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
645           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
646           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
647             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
648           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
649             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
650
651
652           if (! lookup_for_responses_to_locally_received_packets)
653             {
654               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
655               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
656
657               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
658
659               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
660               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
661             }
662
663           tcp0 = (void *) (ip0 + 1);
664           tcp1 = (void *) (ip1 + 1);
665
666           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
667                          || ip0->protocol == IP_PROTOCOL_UDP);
668           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
669                          || ip1->protocol == IP_PROTOCOL_UDP);
670
671           if (! lookup_for_responses_to_locally_received_packets)
672             {
673               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
674               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
675             }
676
677           if (! lookup_for_responses_to_locally_received_packets)
678             {
679               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
680               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
681             }
682
683           if (! lookup_for_responses_to_locally_received_packets)
684             {
685               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
686               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
687             }
688
689           if (lookup_for_responses_to_locally_received_packets)
690             {
691               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
692               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
693             }
694           else
695             {
696               /* Handle default route. */
697               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
698               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
699
700               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
701               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
702             }
703
704           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
705                                                            &ip0->dst_address,
706                                                            /* no_default_route */ 0));
707           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
708                                                            &ip1->dst_address,
709                                                            /* no_default_route */ 0));
710           adj0 = ip_get_adjacency (lm, adj_index0);
711           adj1 = ip_get_adjacency (lm, adj_index1);
712
713           next0 = adj0->lookup_next_index;
714           next1 = adj1->lookup_next_index;
715
716           /* Use flow hash to compute multipath adjacency. */
717           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
718           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
719           if (PREDICT_FALSE (adj0->n_adj > 1))
720             {
721               flow_hash_config0 = 
722                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
723               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
724                 ip4_compute_flow_hash (ip0, flow_hash_config0);
725             }
726           if (PREDICT_FALSE(adj1->n_adj > 1))
727             {
728               flow_hash_config1 = 
729                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
730               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
731                 ip4_compute_flow_hash (ip1, flow_hash_config1);
732             }
733
734           ASSERT (adj0->n_adj > 0);
735           ASSERT (adj1->n_adj > 0);
736           ASSERT (is_pow2 (adj0->n_adj));
737           ASSERT (is_pow2 (adj1->n_adj));
738           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
739           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
740
741           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
742           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
743
744           vlib_increment_combined_counter 
745               (cm, cpu_index, adj_index0, 1,
746                vlib_buffer_length_in_chain (vm, p0) 
747                + sizeof(ethernet_header_t));
748           vlib_increment_combined_counter 
749               (cm, cpu_index, adj_index1, 1,
750                vlib_buffer_length_in_chain (vm, p1)
751                + sizeof(ethernet_header_t));
752
753           from += 2;
754           to_next += 2;
755           n_left_to_next -= 2;
756           n_left_from -= 2;
757
758           wrong_next = (next0 != next) + 2*(next1 != next);
759           if (PREDICT_FALSE (wrong_next != 0))
760             {
761               switch (wrong_next)
762                 {
763                 case 1:
764                   /* A B A */
765                   to_next[-2] = pi1;
766                   to_next -= 1;
767                   n_left_to_next += 1;
768                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
769                   break;
770
771                 case 2:
772                   /* A A B */
773                   to_next -= 1;
774                   n_left_to_next += 1;
775                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
776                   break;
777
778                 case 3:
779                   /* A B C */
780                   to_next -= 2;
781                   n_left_to_next += 2;
782                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
783                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
784                   if (next0 == next1)
785                     {
786                       /* A B B */
787                       vlib_put_next_frame (vm, node, next, n_left_to_next);
788                       next = next1;
789                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
790                     }
791                 }
792             }
793         }
794     
795       while (n_left_from > 0 && n_left_to_next > 0)
796         {
797           vlib_buffer_t * p0;
798           ip4_header_t * ip0;
799           __attribute__((unused)) tcp_header_t * tcp0;
800           ip_lookup_next_t next0;
801           ip_adjacency_t * adj0;
802           ip4_fib_mtrie_t * mtrie0;
803           ip4_fib_mtrie_leaf_t leaf0;
804           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
805           u32 flow_hash_config0, hash_c0;
806
807           pi0 = from[0];
808           to_next[0] = pi0;
809
810           p0 = vlib_get_buffer (vm, pi0);
811
812           ip0 = vlib_buffer_get_current (p0);
813
814           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
815           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
816             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
817
818           if (! lookup_for_responses_to_locally_received_packets)
819             {
820               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
821
822               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
823
824               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
825             }
826
827           tcp0 = (void *) (ip0 + 1);
828
829           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
830                          || ip0->protocol == IP_PROTOCOL_UDP);
831
832           if (! lookup_for_responses_to_locally_received_packets)
833             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
834
835           if (! lookup_for_responses_to_locally_received_packets)
836             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
837
838           if (! lookup_for_responses_to_locally_received_packets)
839             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
840
841           if (lookup_for_responses_to_locally_received_packets)
842             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
843           else
844             {
845               /* Handle default route. */
846               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
847               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
848             }
849
850           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
851                                                            &ip0->dst_address,
852                                                            /* no_default_route */ 0));
853
854           adj0 = ip_get_adjacency (lm, adj_index0);
855
856           next0 = adj0->lookup_next_index;
857
858           /* Use flow hash to compute multipath adjacency. */
859           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
860           if (PREDICT_FALSE(adj0->n_adj > 1))
861             {
862               flow_hash_config0 = 
863                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
864
865               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
866                 ip4_compute_flow_hash (ip0, flow_hash_config0);
867             }
868
869           ASSERT (adj0->n_adj > 0);
870           ASSERT (is_pow2 (adj0->n_adj));
871           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
872
873           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
874
875           vlib_increment_combined_counter 
876               (cm, cpu_index, adj_index0, 1,
877                vlib_buffer_length_in_chain (vm, p0)
878                + sizeof(ethernet_header_t));
879
880           from += 1;
881           to_next += 1;
882           n_left_to_next -= 1;
883           n_left_from -= 1;
884
885           if (PREDICT_FALSE (next0 != next))
886             {
887               n_left_to_next += 1;
888               vlib_put_next_frame (vm, node, next, n_left_to_next);
889               next = next0;
890               vlib_get_next_frame (vm, node, next,
891                                    to_next, n_left_to_next);
892               to_next[0] = pi0;
893               to_next += 1;
894               n_left_to_next -= 1;
895             }
896         }
897
898       vlib_put_next_frame (vm, node, next, n_left_to_next);
899     }
900
901   return frame->n_vectors;
902 }
903
904 static uword
905 ip4_lookup (vlib_main_t * vm,
906             vlib_node_runtime_t * node,
907             vlib_frame_t * frame)
908 {
909   return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
910
911 }
912
913 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
914                                         ip_adjacency_t * adj,
915                                         u32 sw_if_index,
916                                         u32 if_address_index)
917 {
918   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
919   ip_lookup_next_t n;
920   vnet_l3_packet_type_t packet_type;
921   u32 node_index;
922
923   if (hw->hw_class_index == ethernet_hw_interface_class.index
924       || hw->hw_class_index == srp_hw_interface_class.index)
925     {
926       /* 
927        * We have a bit of a problem in this case. ip4-arp uses
928        * the rewrite_header.next_index to hand pkts to the
929        * indicated inteface output node. We can end up in
930        * ip4_rewrite_local, too, which also pays attention to 
931        * rewrite_header.next index. Net result: a hack in
932        * ip4_rewrite_local...
933        */
934       n = IP_LOOKUP_NEXT_ARP;
935       node_index = ip4_arp_node.index;
936       adj->if_address_index = if_address_index;
937       packet_type = VNET_L3_PACKET_TYPE_ARP;
938     }
939   else
940     {
941       n = IP_LOOKUP_NEXT_REWRITE;
942       node_index = ip4_rewrite_node.index;
943       packet_type = VNET_L3_PACKET_TYPE_IP4;
944     }
945
946   adj->lookup_next_index = n;
947   vnet_rewrite_for_sw_interface
948     (vnm,
949      packet_type,
950      sw_if_index,
951      node_index,
952      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
953      &adj->rewrite_header,
954      sizeof (adj->rewrite_data));
955 }
956
957 static void
958 ip4_add_interface_routes (u32 sw_if_index,
959                           ip4_main_t * im, u32 fib_index,
960                           ip_interface_address_t * a)
961 {
962   vnet_main_t * vnm = vnet_get_main();
963   ip_lookup_main_t * lm = &im->lookup_main;
964   ip_adjacency_t * adj;
965   ip4_address_t * address = ip_interface_address_get_address (lm, a);
966   ip4_add_del_route_args_t x;
967   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
968   u32 classify_table_index;
969
970   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
971   x.table_index_or_table_id = fib_index;
972   x.flags = (IP4_ROUTE_FLAG_ADD
973              | IP4_ROUTE_FLAG_FIB_INDEX
974              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
975   x.dst_address = address[0];
976   x.dst_address_length = a->address_length;
977   x.n_add_adj = 0;
978   x.add_adj = 0;
979
980   a->neighbor_probe_adj_index = ~0;
981   if (a->address_length < 32)
982     {
983       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
984                               &x.adj_index);
985       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
986       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
987       ip4_add_del_route (im, &x);
988       a->neighbor_probe_adj_index = x.adj_index;
989     }
990   
991   /* Add e.g. 1.1.1.1/32 as local to this host. */
992   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
993                           &x.adj_index);
994   
995   classify_table_index = ~0;
996   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
997     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
998   if (classify_table_index != (u32) ~0)
999     {
1000       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1001       adj->classify.table_index = classify_table_index;
1002     }
1003   else
1004     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1005   
1006   adj->if_address_index = a - lm->if_address_pool;
1007   adj->rewrite_header.sw_if_index = sw_if_index;
1008   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1009   /* 
1010    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1011    * fail an RPF-ish check, but still go thru the rewrite code...
1012    */
1013   adj->rewrite_header.data_bytes = 0;
1014
1015   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1016   x.dst_address_length = 32;
1017   ip4_add_del_route (im, &x);
1018 }
1019
1020 static void
1021 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1022 {
1023   ip4_add_del_route_args_t x;
1024
1025   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1026   x.table_index_or_table_id = fib_index;
1027   x.flags = (IP4_ROUTE_FLAG_DEL
1028              | IP4_ROUTE_FLAG_FIB_INDEX
1029              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1030   x.dst_address = address[0];
1031   x.dst_address_length = address_length;
1032   x.adj_index = ~0;
1033   x.n_add_adj = 0;
1034   x.add_adj = 0;
1035
1036   if (address_length < 32)
1037     ip4_add_del_route (im, &x);
1038
1039   x.dst_address_length = 32;
1040   ip4_add_del_route (im, &x);
1041
1042   ip4_delete_matching_routes (im,
1043                               fib_index,
1044                               IP4_ROUTE_FLAG_FIB_INDEX,
1045                               address,
1046                               address_length);
1047 }
1048
1049 typedef struct {
1050     u32 sw_if_index;
1051     ip4_address_t address;
1052     u32 length;
1053 } ip4_interface_address_t;
1054
1055 static clib_error_t *
1056 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1057                                         u32 sw_if_index,
1058                                         ip4_address_t * new_address,
1059                                         u32 new_length,
1060                                         u32 redistribute,
1061                                         u32 insert_routes,
1062                                         u32 is_del);
1063
1064 static clib_error_t *
1065 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1066                                         u32 sw_if_index,
1067                                         ip4_address_t * address,
1068                                         u32 address_length,
1069                                         u32 redistribute,
1070                                         u32 insert_routes,
1071                                         u32 is_del)
1072 {
1073   vnet_main_t * vnm = vnet_get_main();
1074   ip4_main_t * im = &ip4_main;
1075   ip_lookup_main_t * lm = &im->lookup_main;
1076   clib_error_t * error = 0;
1077   u32 if_address_index, elts_before;
1078   ip4_address_fib_t ip4_af, * addr_fib = 0;
1079
1080   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1081   ip4_addr_fib_init (&ip4_af, address,
1082                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1083   vec_add1 (addr_fib, ip4_af);
1084
1085   /* When adding an address check that it does not conflict with an existing address. */
1086   if (! is_del)
1087     {
1088       ip_interface_address_t * ia;
1089       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1090                                     0 /* honor unnumbered */,
1091       ({
1092         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1093
1094         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1095             || ip4_destination_matches_route (im, x, address, address_length))
1096           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1097                                     format_ip4_address_and_length, address, address_length,
1098                                     format_ip4_address_and_length, x, ia->address_length,
1099                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1100       }));
1101     }
1102
1103   elts_before = pool_elts (lm->if_address_pool);
1104
1105   error = ip_interface_address_add_del
1106     (lm,
1107      sw_if_index,
1108      addr_fib,
1109      address_length,
1110      is_del,
1111      &if_address_index);
1112   if (error)
1113     goto done;
1114   
1115   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1116     {
1117       if (is_del)
1118         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1119                                   address_length);
1120       
1121       else
1122           ip4_add_interface_routes (sw_if_index,
1123                                     im, ip4_af.fib_index,
1124                                     pool_elt_at_index 
1125                                     (lm->if_address_pool, if_address_index));
1126     }
1127
1128   /* If pool did not grow/shrink: add duplicate address. */
1129   if (elts_before != pool_elts (lm->if_address_pool))
1130     {
1131       ip4_add_del_interface_address_callback_t * cb;
1132       vec_foreach (cb, im->add_del_interface_address_callbacks)
1133         cb->function (im, cb->function_opaque, sw_if_index,
1134                       address, address_length,
1135                       if_address_index,
1136                       is_del);
1137     }
1138
1139  done:
1140   vec_free (addr_fib);
1141   return error;
1142 }
1143
1144 clib_error_t *
1145 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1146                                ip4_address_t * address, u32 address_length,
1147                                u32 is_del)
1148 {
1149   return ip4_add_del_interface_address_internal
1150     (vm, sw_if_index, address, address_length,
1151      /* redistribute */ 1,
1152      /* insert_routes */ 1,
1153      is_del);
1154 }
1155
1156 static clib_error_t *
1157 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1158                                 u32 sw_if_index,
1159                                 u32 flags)
1160 {
1161   ip4_main_t * im = &ip4_main;
1162   ip_interface_address_t * ia;
1163   ip4_address_t * a;
1164   u32 is_admin_up, fib_index;
1165   
1166   /* Fill in lookup tables with default table (0). */
1167   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1168   
1169   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1170   
1171   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1172   
1173   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1174
1175   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1176                                 0 /* honor unnumbered */,
1177   ({
1178     a = ip_interface_address_get_address (&im->lookup_main, ia);
1179     if (is_admin_up)
1180       ip4_add_interface_routes (sw_if_index,
1181                                 im, fib_index,
1182                                 ia);
1183     else
1184       ip4_del_interface_routes (im, fib_index,
1185                                 a, ia->address_length);
1186   }));
1187
1188   return 0;
1189 }
1190  
1191 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1192
1193 static clib_error_t *
1194 ip4_sw_interface_add_del (vnet_main_t * vnm,
1195                           u32 sw_if_index,
1196                           u32 is_add)
1197 {
1198   vlib_main_t * vm = vnm->vlib_main;
1199   ip4_main_t * im = &ip4_main;
1200   ip_lookup_main_t * lm = &im->lookup_main;
1201   u32 ci, cast;
1202
1203   for (cast = 0; cast < VNET_N_CAST; cast++)
1204     {
1205       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1206       vnet_config_main_t * vcm = &cm->config_main;
1207
1208       if (! vcm->node_index_by_feature_index)
1209         {
1210           if (cast == VNET_UNICAST)
1211             {
1212               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1213               static char * feature_nodes[] = {
1214                 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1215                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1216                 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1217                 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1218                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1219                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1220               };
1221
1222               vnet_config_init (vm, vcm,
1223                                 start_nodes, ARRAY_LEN (start_nodes),
1224                                 feature_nodes, ARRAY_LEN (feature_nodes));
1225             }
1226           else
1227             {
1228               static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1229               static char * feature_nodes[] = {
1230                 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1231                 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1232               };
1233
1234               vnet_config_init (vm, vcm,
1235                                 start_nodes, ARRAY_LEN (start_nodes),
1236                                 feature_nodes, ARRAY_LEN (feature_nodes));
1237             }
1238         }
1239
1240       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1241       ci = cm->config_index_by_sw_if_index[sw_if_index];
1242
1243       if (is_add)
1244         ci = vnet_config_add_feature (vm, vcm,
1245                                       ci,
1246                                       IP4_RX_FEATURE_LOOKUP,
1247                                       /* config data */ 0,
1248                                       /* # bytes of config data */ 0);
1249       else
1250         ci = vnet_config_del_feature (vm, vcm,
1251                                       ci,
1252                                       IP4_RX_FEATURE_LOOKUP,
1253                                       /* config data */ 0,
1254                                       /* # bytes of config data */ 0);
1255
1256       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1257     }
1258
1259   return /* no error */ 0;
1260 }
1261
1262 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1263
1264 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1265   .function = ip4_lookup,
1266   .name = "ip4-lookup",
1267   .vector_size = sizeof (u32),
1268
1269   .n_next_nodes = IP_LOOKUP_N_NEXT,
1270   .next_nodes = {
1271     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
1272     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
1273     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
1274     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
1275     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
1276     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
1277     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
1278     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
1279     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
1280     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
1281     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
1282     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
1283     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
1284   },
1285 };
1286
1287 /* Global IP4 main. */
1288 ip4_main_t ip4_main;
1289
1290 clib_error_t *
1291 ip4_lookup_init (vlib_main_t * vm)
1292 {
1293   ip4_main_t * im = &ip4_main;
1294   uword i;
1295
1296   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1297     {
1298       u32 m;
1299
1300       if (i < 32)
1301         m = pow2_mask (i) << (32 - i);
1302       else 
1303         m = ~0;
1304       im->fib_masks[i] = clib_host_to_net_u32 (m);
1305     }
1306
1307   /* Create FIB with index 0 and table id of 0. */
1308   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1309
1310   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1311
1312   {
1313     pg_node_t * pn;
1314     pn = pg_get_node (ip4_lookup_node.index);
1315     pn->unformat_edit = unformat_pg_ip4_header;
1316   }
1317
1318   {
1319     ethernet_arp_header_t h;
1320
1321     memset (&h, 0, sizeof (h));
1322
1323     /* Set target ethernet address to all zeros. */
1324     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1325
1326 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1327 #define _8(f,v) h.f = v;
1328     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1329     _16 (l3_type, ETHERNET_TYPE_IP4);
1330     _8 (n_l2_address_bytes, 6);
1331     _8 (n_l3_address_bytes, 4);
1332     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1333 #undef _16
1334 #undef _8
1335
1336     vlib_packet_template_init (vm,
1337                                &im->ip4_arp_request_packet_template,
1338                                /* data */ &h,
1339                                sizeof (h),
1340                                /* alloc chunk size */ 8,
1341                                "ip4 arp");
1342   }
1343
1344   return 0;
1345 }
1346
1347 VLIB_INIT_FUNCTION (ip4_lookup_init);
1348
1349 typedef struct {
1350   /* Adjacency taken. */
1351   u32 adj_index;
1352   u32 flow_hash;
1353   u32 fib_index;
1354
1355   /* Packet data, possibly *after* rewrite. */
1356   u8 packet_data[64 - 1*sizeof(u32)];
1357 } ip4_forward_next_trace_t;
1358
1359 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1360 {
1361   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1362   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1363   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1364   vnet_main_t * vnm = vnet_get_main();
1365   ip4_main_t * im = &ip4_main;
1366   ip_adjacency_t * adj;
1367   uword indent = format_get_indent (s);
1368
1369   adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1370   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1371               t->fib_index, t->adj_index, format_ip_adjacency,
1372               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1373   switch (adj->lookup_next_index)
1374     {
1375     case IP_LOOKUP_NEXT_REWRITE:
1376       s = format (s, "\n%U%U",
1377                   format_white_space, indent,
1378                   format_ip_adjacency_packet_data,
1379                   vnm, &im->lookup_main, t->adj_index,
1380                   t->packet_data, sizeof (t->packet_data));
1381       break;
1382
1383     default:
1384       break;
1385     }
1386
1387   return s;
1388 }
1389
1390 /* Common trace function for all ip4-forward next nodes. */
1391 void
1392 ip4_forward_next_trace (vlib_main_t * vm,
1393                         vlib_node_runtime_t * node,
1394                         vlib_frame_t * frame,
1395                         vlib_rx_or_tx_t which_adj_index)
1396 {
1397   u32 * from, n_left;
1398   ip4_main_t * im = &ip4_main;
1399
1400   n_left = frame->n_vectors;
1401   from = vlib_frame_vector_args (frame);
1402   
1403   while (n_left >= 4)
1404     {
1405       u32 bi0, bi1;
1406       vlib_buffer_t * b0, * b1;
1407       ip4_forward_next_trace_t * t0, * t1;
1408
1409       /* Prefetch next iteration. */
1410       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1411       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1412
1413       bi0 = from[0];
1414       bi1 = from[1];
1415
1416       b0 = vlib_get_buffer (vm, bi0);
1417       b1 = vlib_get_buffer (vm, bi1);
1418
1419       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1420         {
1421           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1422           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1423           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1424           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1425                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1426           memcpy (t0->packet_data,
1427                   vlib_buffer_get_current (b0),
1428                   sizeof (t0->packet_data));
1429         }
1430       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1431         {
1432           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1433           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1434           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1435           t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1436                              vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1437           memcpy (t1->packet_data,
1438                   vlib_buffer_get_current (b1),
1439                   sizeof (t1->packet_data));
1440         }
1441       from += 2;
1442       n_left -= 2;
1443     }
1444
1445   while (n_left >= 1)
1446     {
1447       u32 bi0;
1448       vlib_buffer_t * b0;
1449       ip4_forward_next_trace_t * t0;
1450
1451       bi0 = from[0];
1452
1453       b0 = vlib_get_buffer (vm, bi0);
1454
1455       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1456         {
1457           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1458           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1459           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1460           t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
1461                              vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1462           memcpy (t0->packet_data,
1463                   vlib_buffer_get_current (b0),
1464                   sizeof (t0->packet_data));
1465         }
1466       from += 1;
1467       n_left -= 1;
1468     }
1469 }
1470
1471 static uword
1472 ip4_drop_or_punt (vlib_main_t * vm,
1473                   vlib_node_runtime_t * node,
1474                   vlib_frame_t * frame,
1475                   ip4_error_t error_code)
1476 {
1477   u32 * buffers = vlib_frame_vector_args (frame);
1478   uword n_packets = frame->n_vectors;
1479
1480   vlib_error_drop_buffers (vm, node,
1481                            buffers,
1482                            /* stride */ 1,
1483                            n_packets,
1484                            /* next */ 0,
1485                            ip4_input_node.index,
1486                            error_code);
1487
1488   if (node->flags & VLIB_NODE_FLAG_TRACE)
1489     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1490
1491   return n_packets;
1492 }
1493
1494 static uword
1495 ip4_drop (vlib_main_t * vm,
1496           vlib_node_runtime_t * node,
1497           vlib_frame_t * frame)
1498 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1499
1500 static uword
1501 ip4_punt (vlib_main_t * vm,
1502           vlib_node_runtime_t * node,
1503           vlib_frame_t * frame)
1504 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1505
1506 static uword
1507 ip4_miss (vlib_main_t * vm,
1508           vlib_node_runtime_t * node,
1509           vlib_frame_t * frame)
1510 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1511
1512 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1513   .function = ip4_drop,
1514   .name = "ip4-drop",
1515   .vector_size = sizeof (u32),
1516
1517   .format_trace = format_ip4_forward_next_trace,
1518
1519   .n_next_nodes = 1,
1520   .next_nodes = {
1521     [0] = "error-drop",
1522   },
1523 };
1524
1525 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1526   .function = ip4_punt,
1527   .name = "ip4-punt",
1528   .vector_size = sizeof (u32),
1529
1530   .format_trace = format_ip4_forward_next_trace,
1531
1532   .n_next_nodes = 1,
1533   .next_nodes = {
1534     [0] = "error-punt",
1535   },
1536 };
1537
1538 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1539   .function = ip4_miss,
1540   .name = "ip4-miss",
1541   .vector_size = sizeof (u32),
1542
1543   .format_trace = format_ip4_forward_next_trace,
1544
1545   .n_next_nodes = 1,
1546   .next_nodes = {
1547     [0] = "error-drop",
1548   },
1549 };
1550
1551 /* Compute TCP/UDP/ICMP4 checksum in software. */
1552 u16
1553 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1554                               ip4_header_t * ip0)
1555 {
1556   ip_csum_t sum0;
1557   u32 ip_header_length, payload_length_host_byte_order;
1558   u32 n_this_buffer, n_bytes_left;
1559   u16 sum16;
1560   void * data_this_buffer;
1561   
1562   /* Initialize checksum with ip header. */
1563   ip_header_length = ip4_header_bytes (ip0);
1564   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1565   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1566
1567   if (BITS (uword) == 32)
1568     {
1569       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1570       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1571     }
1572   else
1573     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1574
1575   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1576   data_this_buffer = (void *) ip0 + ip_header_length;
1577   if (n_this_buffer + ip_header_length > p0->current_length)
1578     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1579   while (1)
1580     {
1581       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1582       n_bytes_left -= n_this_buffer;
1583       if (n_bytes_left == 0)
1584         break;
1585
1586       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1587       p0 = vlib_get_buffer (vm, p0->next_buffer);
1588       data_this_buffer = vlib_buffer_get_current (p0);
1589       n_this_buffer = p0->current_length;
1590     }
1591
1592   sum16 = ~ ip_csum_fold (sum0);
1593
1594   return sum16;
1595 }
1596
1597 static u32
1598 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1599 {
1600   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1601   udp_header_t * udp0;
1602   u16 sum16;
1603
1604   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1605           || ip0->protocol == IP_PROTOCOL_UDP);
1606
1607   udp0 = (void *) (ip0 + 1);
1608   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1609     {
1610       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1611                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1612       return p0->flags;
1613     }
1614
1615   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1616
1617   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1618                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1619
1620   return p0->flags;
1621 }
1622
1623 static uword
1624 ip4_local (vlib_main_t * vm,
1625            vlib_node_runtime_t * node,
1626            vlib_frame_t * frame)
1627 {
1628   ip4_main_t * im = &ip4_main;
1629   ip_lookup_main_t * lm = &im->lookup_main;
1630   ip_local_next_t next_index;
1631   u32 * from, * to_next, n_left_from, n_left_to_next;
1632   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1633
1634   from = vlib_frame_vector_args (frame);
1635   n_left_from = frame->n_vectors;
1636   next_index = node->cached_next_index;
1637   
1638   if (node->flags & VLIB_NODE_FLAG_TRACE)
1639     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1640
1641   while (n_left_from > 0)
1642     {
1643       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1644
1645       while (n_left_from >= 4 && n_left_to_next >= 2)
1646         {
1647           vlib_buffer_t * p0, * p1;
1648           ip4_header_t * ip0, * ip1;
1649           udp_header_t * udp0, * udp1;
1650           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1651           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1652           ip_adjacency_t * adj0, * adj1;
1653           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1654           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1655           i32 len_diff0, len_diff1;
1656           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1657           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1658           u8 enqueue_code;
1659       
1660           pi0 = to_next[0] = from[0];
1661           pi1 = to_next[1] = from[1];
1662           from += 2;
1663           n_left_from -= 2;
1664           to_next += 2;
1665           n_left_to_next -= 2;
1666       
1667           p0 = vlib_get_buffer (vm, pi0);
1668           p1 = vlib_get_buffer (vm, pi1);
1669
1670           ip0 = vlib_buffer_get_current (p0);
1671           ip1 = vlib_buffer_get_current (p1);
1672
1673           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1674                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1675           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1676                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1677
1678           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1679           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1680
1681           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1682
1683           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1684           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1685
1686           proto0 = ip0->protocol;
1687           proto1 = ip1->protocol;
1688           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1689           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1690           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1691           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1692
1693           flags0 = p0->flags;
1694           flags1 = p1->flags;
1695
1696           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1697           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1698
1699           udp0 = ip4_next_header (ip0);
1700           udp1 = ip4_next_header (ip1);
1701
1702           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1703           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1704           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1705
1706           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1707           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1708
1709           /* Verify UDP length. */
1710           ip_len0 = clib_net_to_host_u16 (ip0->length);
1711           ip_len1 = clib_net_to_host_u16 (ip1->length);
1712           udp_len0 = clib_net_to_host_u16 (udp0->length);
1713           udp_len1 = clib_net_to_host_u16 (udp1->length);
1714
1715           len_diff0 = ip_len0 - udp_len0;
1716           len_diff1 = ip_len1 - udp_len1;
1717
1718           len_diff0 = is_udp0 ? len_diff0 : 0;
1719           len_diff1 = is_udp1 ? len_diff1 : 0;
1720
1721           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1722                                 & good_tcp_udp0 & good_tcp_udp1)))
1723             {
1724               if (is_tcp_udp0)
1725                 {
1726                   if (is_tcp_udp0
1727                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1728                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1729                   good_tcp_udp0 =
1730                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1731                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1732                 }
1733               if (is_tcp_udp1)
1734                 {
1735                   if (is_tcp_udp1
1736                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1737                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1738                   good_tcp_udp1 =
1739                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1740                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1741                 }
1742             }
1743
1744           good_tcp_udp0 &= len_diff0 >= 0;
1745           good_tcp_udp1 &= len_diff1 >= 0;
1746
1747           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1748           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1749
1750           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1751
1752           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1753           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1754
1755           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1756           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1757                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1758                     : error0);
1759           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1760                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1761                     : error1);
1762
1763           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1764           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1765
1766           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1767           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1768
1769           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1770           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1771
1772           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1773                                                            &ip0->src_address,
1774                                                            /* no_default_route */ 1));
1775           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1776                                                            &ip1->src_address,
1777                                                            /* no_default_route */ 1));
1778
1779           adj0 = ip_get_adjacency (lm, adj_index0);
1780           adj1 = ip_get_adjacency (lm, adj_index1);
1781
1782           /* 
1783            * Must have a route to source otherwise we drop the packet.
1784            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1785            */
1786           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1787                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1788                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1789                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1790                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1791                     ? IP4_ERROR_SRC_LOOKUP_MISS
1792                     : error0);
1793           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1794                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1795                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1796                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1797                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1798                     ? IP4_ERROR_SRC_LOOKUP_MISS
1799                     : error1);
1800
1801           next0 = lm->local_next_by_ip_protocol[proto0];
1802           next1 = lm->local_next_by_ip_protocol[proto1];
1803
1804           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1805           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1806
1807           p0->error = error0 ? error_node->errors[error0] : 0;
1808           p1->error = error1 ? error_node->errors[error1] : 0;
1809
1810           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1811
1812           if (PREDICT_FALSE (enqueue_code != 0))
1813             {
1814               switch (enqueue_code)
1815                 {
1816                 case 1:
1817                   /* A B A */
1818                   to_next[-2] = pi1;
1819                   to_next -= 1;
1820                   n_left_to_next += 1;
1821                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1822                   break;
1823
1824                 case 2:
1825                   /* A A B */
1826                   to_next -= 1;
1827                   n_left_to_next += 1;
1828                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1829                   break;
1830
1831                 case 3:
1832                   /* A B B or A B C */
1833                   to_next -= 2;
1834                   n_left_to_next += 2;
1835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1836                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1837                   if (next0 == next1)
1838                     {
1839                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1840                       next_index = next1;
1841                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1842                     }
1843                   break;
1844                 }
1845             }
1846         }
1847
1848       while (n_left_from > 0 && n_left_to_next > 0)
1849         {
1850           vlib_buffer_t * p0;
1851           ip4_header_t * ip0;
1852           udp_header_t * udp0;
1853           ip4_fib_mtrie_t * mtrie0;
1854           ip4_fib_mtrie_leaf_t leaf0;
1855           ip_adjacency_t * adj0;
1856           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1857           i32 len_diff0;
1858           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1859       
1860           pi0 = to_next[0] = from[0];
1861           from += 1;
1862           n_left_from -= 1;
1863           to_next += 1;
1864           n_left_to_next -= 1;
1865       
1866           p0 = vlib_get_buffer (vm, pi0);
1867
1868           ip0 = vlib_buffer_get_current (p0);
1869
1870           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1871                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1872
1873           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1874
1875           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1876
1877           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1878
1879           proto0 = ip0->protocol;
1880           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1881           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1882
1883           flags0 = p0->flags;
1884
1885           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1886
1887           udp0 = ip4_next_header (ip0);
1888
1889           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1890           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1891
1892           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1893
1894           /* Verify UDP length. */
1895           ip_len0 = clib_net_to_host_u16 (ip0->length);
1896           udp_len0 = clib_net_to_host_u16 (udp0->length);
1897
1898           len_diff0 = ip_len0 - udp_len0;
1899
1900           len_diff0 = is_udp0 ? len_diff0 : 0;
1901
1902           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1903             {
1904               if (is_tcp_udp0)
1905                 {
1906                   if (is_tcp_udp0
1907                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1908                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1909                   good_tcp_udp0 =
1910                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1911                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1912                 }
1913             }
1914
1915           good_tcp_udp0 &= len_diff0 >= 0;
1916
1917           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1918
1919           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1920
1921           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1922
1923           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1924           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1925                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1926                     : error0);
1927
1928           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1929
1930           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1931           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1932
1933           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1934                                                            &ip0->src_address,
1935                                                            /* no_default_route */ 1));
1936
1937           adj0 = ip_get_adjacency (lm, adj_index0);
1938
1939           /* Must have a route to source otherwise we drop the packet. */
1940           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1941                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1942                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1943                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1944                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1945                     ? IP4_ERROR_SRC_LOOKUP_MISS
1946                     : error0);
1947
1948           next0 = lm->local_next_by_ip_protocol[proto0];
1949
1950           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1951
1952           p0->error = error0? error_node->errors[error0] : 0;
1953
1954           if (PREDICT_FALSE (next0 != next_index))
1955             {
1956               n_left_to_next += 1;
1957               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1958
1959               next_index = next0;
1960               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1961               to_next[0] = pi0;
1962               to_next += 1;
1963               n_left_to_next -= 1;
1964             }
1965         }
1966   
1967       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1968     }
1969
1970   return frame->n_vectors;
1971 }
1972
1973 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1974   .function = ip4_local,
1975   .name = "ip4-local",
1976   .vector_size = sizeof (u32),
1977
1978   .format_trace = format_ip4_forward_next_trace,
1979
1980   .n_next_nodes = IP_LOCAL_N_NEXT,
1981   .next_nodes = {
1982     [IP_LOCAL_NEXT_DROP] = "error-drop",
1983     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1984     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1985     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1986   },
1987 };
1988
1989 void ip4_register_protocol (u32 protocol, u32 node_index)
1990 {
1991   vlib_main_t * vm = vlib_get_main();
1992   ip4_main_t * im = &ip4_main;
1993   ip_lookup_main_t * lm = &im->lookup_main;
1994
1995   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1996   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1997 }
1998
1999 static clib_error_t *
2000 show_ip_local_command_fn (vlib_main_t * vm,
2001                           unformat_input_t * input,
2002                          vlib_cli_command_t * cmd)
2003 {
2004   ip4_main_t * im = &ip4_main;
2005   ip_lookup_main_t * lm = &im->lookup_main;
2006   int i;
2007
2008   vlib_cli_output (vm, "Protocols handled by ip4_local");
2009   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2010     {
2011       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2012         vlib_cli_output (vm, "%d", i);
2013     }
2014   return 0;
2015 }
2016
2017
2018
2019 VLIB_CLI_COMMAND (show_ip_local, static) = {
2020   .path = "show ip local",
2021   .function = show_ip_local_command_fn,
2022   .short_help = "Show ip local protocol table",
2023 };
2024
2025 static uword
2026 ip4_arp (vlib_main_t * vm,
2027          vlib_node_runtime_t * node,
2028          vlib_frame_t * frame)
2029 {
2030   vnet_main_t * vnm = vnet_get_main();
2031   ip4_main_t * im = &ip4_main;
2032   ip_lookup_main_t * lm = &im->lookup_main;
2033   u32 * from, * to_next_drop;
2034   uword n_left_from, n_left_to_next_drop, next_index;
2035   static f64 time_last_seed_change = -1e100;
2036   static u32 hash_seeds[3];
2037   static uword hash_bitmap[256 / BITS (uword)]; 
2038   f64 time_now;
2039
2040   if (node->flags & VLIB_NODE_FLAG_TRACE)
2041     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2042
2043   time_now = vlib_time_now (vm);
2044   if (time_now - time_last_seed_change > 1e-3)
2045     {
2046       uword i;
2047       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2048                                              sizeof (hash_seeds));
2049       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2050         hash_seeds[i] = r[i];
2051
2052       /* Mark all hash keys as been no-seen before. */
2053       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2054         hash_bitmap[i] = 0;
2055
2056       time_last_seed_change = time_now;
2057     }
2058
2059   from = vlib_frame_vector_args (frame);
2060   n_left_from = frame->n_vectors;
2061   next_index = node->cached_next_index;
2062   if (next_index == IP4_ARP_NEXT_DROP)
2063     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2064
2065   while (n_left_from > 0)
2066     {
2067       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2068                            to_next_drop, n_left_to_next_drop);
2069
2070       while (n_left_from > 0 && n_left_to_next_drop > 0)
2071         {
2072           vlib_buffer_t * p0;
2073           ip4_header_t * ip0;
2074           ethernet_header_t * eh0;
2075           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2076           uword bm0;
2077           ip_adjacency_t * adj0;
2078
2079           pi0 = from[0];
2080
2081           p0 = vlib_get_buffer (vm, pi0);
2082
2083           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2084           adj0 = ip_get_adjacency (lm, adj_index0);
2085           ip0 = vlib_buffer_get_current (p0);
2086
2087           /* 
2088            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2089            * rewrite to this packet, we need to skip it here.
2090            * Note, to distinguish from src IP addr *.8.6.*, we
2091            * check for a bcast eth dest instead of IPv4 version.
2092            */
2093           eh0 = (ethernet_header_t*)ip0;
2094           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2095             {
2096               u32 vlan_num = 0;
2097               u16 * etype = &eh0->type;
2098               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2099                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2100                 {
2101                   vlan_num += 1;
2102                   etype += 2; //vlan tag also 16 bits, same as etype
2103                 }
2104               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2105                 {
2106                   vlib_buffer_advance (
2107                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2108                   ip0 = vlib_buffer_get_current (p0);
2109                 }
2110             }
2111
2112           a0 = hash_seeds[0];
2113           b0 = hash_seeds[1];
2114           c0 = hash_seeds[2];
2115
2116           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2117           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2118
2119           a0 ^= ip0->dst_address.data_u32;
2120           b0 ^= sw_if_index0;
2121
2122           hash_v3_finalize32 (a0, b0, c0);
2123
2124           c0 &= BITS (hash_bitmap) - 1;
2125           c0 = c0 / BITS (uword);
2126           m0 = (uword) 1 << (c0 % BITS (uword));
2127
2128           bm0 = hash_bitmap[c0];
2129           drop0 = (bm0 & m0) != 0;
2130
2131           /* Mark it as seen. */
2132           hash_bitmap[c0] = bm0 | m0;
2133
2134           from += 1;
2135           n_left_from -= 1;
2136           to_next_drop[0] = pi0;
2137           to_next_drop += 1;
2138           n_left_to_next_drop -= 1;
2139
2140           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2141
2142           if (drop0)
2143             continue;
2144
2145           /* 
2146            * Can happen if the control-plane is programming tables
2147            * with traffic flowing; at least that's today's lame excuse.
2148            */
2149           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2150             {
2151               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2152             }
2153           else
2154           /* Send ARP request. */
2155           {
2156             u32 bi0 = 0;
2157             vlib_buffer_t * b0;
2158             ethernet_arp_header_t * h0;
2159             vnet_hw_interface_t * hw_if0;
2160
2161             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2162
2163             /* Add rewrite/encap string for ARP packet. */
2164             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2165
2166             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2167
2168             /* Src ethernet address in ARP header. */
2169             memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2170                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2171
2172             ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2173
2174             /* Copy in destination address we are requesting. */
2175             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2176
2177             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2178             b0 = vlib_get_buffer (vm, bi0);
2179             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2180
2181             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2182
2183             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2184           }
2185         }
2186
2187       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2188     }
2189
2190   return frame->n_vectors;
2191 }
2192
2193 static char * ip4_arp_error_strings[] = {
2194   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2195   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2196   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2197   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2198   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2199 };
2200
2201 VLIB_REGISTER_NODE (ip4_arp_node) = {
2202   .function = ip4_arp,
2203   .name = "ip4-arp",
2204   .vector_size = sizeof (u32),
2205
2206   .format_trace = format_ip4_forward_next_trace,
2207
2208   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2209   .error_strings = ip4_arp_error_strings,
2210
2211   .n_next_nodes = IP4_ARP_N_NEXT,
2212   .next_nodes = {
2213     [IP4_ARP_NEXT_DROP] = "error-drop",
2214   },
2215 };
2216
2217 #define foreach_notrace_ip4_arp_error           \
2218 _(DROP)                                         \
2219 _(REQUEST_SENT)                                 \
2220 _(REPLICATE_DROP)                               \
2221 _(REPLICATE_FAIL)
2222
2223 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2224 {
2225   vlib_node_runtime_t *rt = 
2226     vlib_node_get_runtime (vm, ip4_arp_node.index);
2227
2228   /* don't trace ARP request packets */
2229 #define _(a)                                    \
2230     vnet_pcap_drop_trace_filter_add_del         \
2231         (rt->errors[IP4_ARP_ERROR_##a],         \
2232          1 /* is_add */);
2233     foreach_notrace_ip4_arp_error;
2234 #undef _
2235   return 0;
2236 }
2237
2238 VLIB_INIT_FUNCTION(arp_notrace_init);
2239
2240
2241 /* Send an ARP request to see if given destination is reachable on given interface. */
2242 clib_error_t *
2243 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2244 {
2245   vnet_main_t * vnm = vnet_get_main();
2246   ip4_main_t * im = &ip4_main;
2247   ethernet_arp_header_t * h;
2248   ip4_address_t * src;
2249   ip_interface_address_t * ia;
2250   ip_adjacency_t * adj;
2251   vnet_hw_interface_t * hi;
2252   vnet_sw_interface_t * si;
2253   vlib_buffer_t * b;
2254   u32 bi = 0;
2255
2256   si = vnet_get_sw_interface (vnm, sw_if_index);
2257
2258   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2259     {
2260       return clib_error_return (0, "%U: interface %U down",
2261                                 format_ip4_address, dst, 
2262                                 format_vnet_sw_if_index_name, vnm, 
2263                                 sw_if_index);
2264     }
2265
2266   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2267   if (! src)
2268     {
2269       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2270       return clib_error_return 
2271         (0, "no matching interface address for destination %U (interface %U)",
2272          format_ip4_address, dst,
2273          format_vnet_sw_if_index_name, vnm, sw_if_index);
2274     }
2275
2276   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2277
2278   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2279
2280   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2281
2282   memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2283
2284   h->ip4_over_ethernet[0].ip4 = src[0];
2285   h->ip4_over_ethernet[1].ip4 = dst[0];
2286
2287   b = vlib_get_buffer (vm, bi);
2288   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2289
2290   /* Add encapsulation string for software interface (e.g. ethernet header). */
2291   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2292   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2293
2294   {
2295     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2296     u32 * to_next = vlib_frame_vector_args (f);
2297     to_next[0] = bi;
2298     f->n_vectors = 1;
2299     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2300   }
2301
2302   return /* no error */ 0;
2303 }
2304
2305 typedef enum {
2306   IP4_REWRITE_NEXT_DROP,
2307   IP4_REWRITE_NEXT_ARP,
2308 } ip4_rewrite_next_t;
2309
2310 always_inline uword
2311 ip4_rewrite_inline (vlib_main_t * vm,
2312                     vlib_node_runtime_t * node,
2313                     vlib_frame_t * frame,
2314                     int rewrite_for_locally_received_packets)
2315 {
2316   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2317   u32 * from = vlib_frame_vector_args (frame);
2318   u32 n_left_from, n_left_to_next, * to_next, next_index;
2319   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2320   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2321
2322   n_left_from = frame->n_vectors;
2323   next_index = node->cached_next_index;
2324   u32 cpu_index = os_get_cpu_number();
2325   
2326   while (n_left_from > 0)
2327     {
2328       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2329
2330       while (n_left_from >= 4 && n_left_to_next >= 2)
2331         {
2332           ip_adjacency_t * adj0, * adj1;
2333           vlib_buffer_t * p0, * p1;
2334           ip4_header_t * ip0, * ip1;
2335           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2336           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2337           u32 next0_override, next1_override;
2338       
2339           if (rewrite_for_locally_received_packets)
2340               next0_override = next1_override = 0;
2341
2342           /* Prefetch next iteration. */
2343           {
2344             vlib_buffer_t * p2, * p3;
2345
2346             p2 = vlib_get_buffer (vm, from[2]);
2347             p3 = vlib_get_buffer (vm, from[3]);
2348
2349             vlib_prefetch_buffer_header (p2, STORE);
2350             vlib_prefetch_buffer_header (p3, STORE);
2351
2352             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2353             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2354           }
2355
2356           pi0 = to_next[0] = from[0];
2357           pi1 = to_next[1] = from[1];
2358
2359           from += 2;
2360           n_left_from -= 2;
2361           to_next += 2;
2362           n_left_to_next -= 2;
2363       
2364           p0 = vlib_get_buffer (vm, pi0);
2365           p1 = vlib_get_buffer (vm, pi1);
2366
2367           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2368           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2369
2370           /* We should never rewrite a pkt using the MISS adjacency */
2371           ASSERT(adj_index0 && adj_index1);
2372
2373           ip0 = vlib_buffer_get_current (p0);
2374           ip1 = vlib_buffer_get_current (p1);
2375
2376           error0 = error1 = IP4_ERROR_NONE;
2377
2378           /* Decrement TTL & update checksum.
2379              Works either endian, so no need for byte swap. */
2380           if (! rewrite_for_locally_received_packets)
2381             {
2382               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2383
2384               /* Input node should have reject packets with ttl 0. */
2385               ASSERT (ip0->ttl > 0);
2386               ASSERT (ip1->ttl > 0);
2387
2388               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2389               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2390
2391               checksum0 += checksum0 >= 0xffff;
2392               checksum1 += checksum1 >= 0xffff;
2393
2394               ip0->checksum = checksum0;
2395               ip1->checksum = checksum1;
2396
2397               ttl0 -= 1;
2398               ttl1 -= 1;
2399
2400               ip0->ttl = ttl0;
2401               ip1->ttl = ttl1;
2402
2403               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2404               error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2405
2406               /* Verify checksum. */
2407               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2408               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2409             }
2410
2411           /* Rewrite packet header and updates lengths. */
2412           adj0 = ip_get_adjacency (lm, adj_index0);
2413           adj1 = ip_get_adjacency (lm, adj_index1);
2414       
2415           if (rewrite_for_locally_received_packets)
2416             {
2417               /*
2418                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2419                * we end up here with a local adjacency in hand
2420                * The local adj rewrite data is 0xfefe on purpose.
2421                * Bad engineer, no donut for you.
2422                */
2423               if (PREDICT_FALSE(adj0->lookup_next_index 
2424                                 == IP_LOOKUP_NEXT_LOCAL))
2425                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2426               if (PREDICT_FALSE(adj0->lookup_next_index
2427                                 == IP_LOOKUP_NEXT_ARP))
2428                 next0_override = IP4_REWRITE_NEXT_ARP;
2429               if (PREDICT_FALSE(adj1->lookup_next_index 
2430                                 == IP_LOOKUP_NEXT_LOCAL))
2431                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2432               if (PREDICT_FALSE(adj1->lookup_next_index
2433                                 == IP_LOOKUP_NEXT_ARP))
2434                 next1_override = IP4_REWRITE_NEXT_ARP;
2435             }
2436
2437           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2438           rw_len0 = adj0[0].rewrite_header.data_bytes;
2439           rw_len1 = adj1[0].rewrite_header.data_bytes;
2440           next0 = (error0 == IP4_ERROR_NONE) 
2441             ? adj0[0].rewrite_header.next_index : 0;
2442
2443           if (rewrite_for_locally_received_packets)
2444               next0 = next0 && next0_override ? next0_override : next0;
2445
2446           next1 = (error1 == IP4_ERROR_NONE)
2447             ? adj1[0].rewrite_header.next_index : 0;
2448
2449           if (rewrite_for_locally_received_packets)
2450               next1 = next1 && next1_override ? next1_override : next1;
2451
2452           /* 
2453            * We've already accounted for an ethernet_header_t elsewhere
2454            */
2455           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2456               vlib_increment_combined_counter 
2457                   (&lm->adjacency_counters,
2458                    cpu_index, adj_index0, 
2459                    /* packet increment */ 0,
2460                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2461
2462           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2463               vlib_increment_combined_counter 
2464                   (&lm->adjacency_counters,
2465                    cpu_index, adj_index1, 
2466                    /* packet increment */ 0,
2467                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2468
2469           /* Check MTU of outgoing interface. */
2470           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2471                     ? IP4_ERROR_MTU_EXCEEDED
2472                     : error0);
2473           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2474                     ? IP4_ERROR_MTU_EXCEEDED
2475                     : error1);
2476
2477           p0->current_data -= rw_len0;
2478           p1->current_data -= rw_len1;
2479
2480           p0->current_length += rw_len0;
2481           p1->current_length += rw_len1;
2482
2483           vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2484           vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2485       
2486           p0->error = error_node->errors[error0];
2487           p1->error = error_node->errors[error1];
2488
2489           /* Guess we are only writing on simple Ethernet header. */
2490           vnet_rewrite_two_headers (adj0[0], adj1[0],
2491                                     ip0, ip1,
2492                                     sizeof (ethernet_header_t));
2493       
2494           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2495                                            to_next, n_left_to_next,
2496                                            pi0, pi1, next0, next1);
2497         }
2498
2499       while (n_left_from > 0 && n_left_to_next > 0)
2500         {
2501           ip_adjacency_t * adj0;
2502           vlib_buffer_t * p0;
2503           ip4_header_t * ip0;
2504           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2505           u32 next0_override;
2506       
2507           if (rewrite_for_locally_received_packets)
2508               next0_override = 0;
2509
2510           pi0 = to_next[0] = from[0];
2511
2512           p0 = vlib_get_buffer (vm, pi0);
2513
2514           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2515
2516           /* We should never rewrite a pkt using the MISS adjacency */
2517           ASSERT(adj_index0);
2518
2519           adj0 = ip_get_adjacency (lm, adj_index0);
2520       
2521           ip0 = vlib_buffer_get_current (p0);
2522
2523           error0 = IP4_ERROR_NONE;
2524           next0 = 0;            /* drop on error */
2525
2526           /* Decrement TTL & update checksum. */
2527           if (! rewrite_for_locally_received_packets)
2528             {
2529               i32 ttl0 = ip0->ttl;
2530
2531               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2532
2533               checksum0 += checksum0 >= 0xffff;
2534
2535               ip0->checksum = checksum0;
2536
2537               ASSERT (ip0->ttl > 0);
2538
2539               ttl0 -= 1;
2540
2541               ip0->ttl = ttl0;
2542
2543               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2544
2545               error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2546             }
2547
2548           if (rewrite_for_locally_received_packets)
2549             {
2550               /*
2551                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2552                * we end up here with a local adjacency in hand
2553                * The local adj rewrite data is 0xfefe on purpose.
2554                * Bad engineer, no donut for you.
2555                */
2556               if (PREDICT_FALSE(adj0->lookup_next_index 
2557                                 == IP_LOOKUP_NEXT_LOCAL))
2558                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2559               /* 
2560                * We have to override the next_index in ARP adjacencies,
2561                * because they're set up for ip4-arp, not this node...
2562                */
2563               if (PREDICT_FALSE(adj0->lookup_next_index
2564                                 == IP_LOOKUP_NEXT_ARP))
2565                 next0_override = IP4_REWRITE_NEXT_ARP;
2566             }
2567
2568           /* Guess we are only writing on simple Ethernet header. */
2569           vnet_rewrite_one_header (adj0[0], ip0, 
2570                                    sizeof (ethernet_header_t));
2571           
2572           /* Update packet buffer attributes/set output interface. */
2573           rw_len0 = adj0[0].rewrite_header.data_bytes;
2574           
2575           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2576               vlib_increment_combined_counter 
2577                   (&lm->adjacency_counters,
2578                    cpu_index, adj_index0, 
2579                    /* packet increment */ 0,
2580                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2581           
2582           /* Check MTU of outgoing interface. */
2583           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2584                     > adj0[0].rewrite_header.max_l3_packet_bytes
2585                     ? IP4_ERROR_MTU_EXCEEDED
2586                     : error0);
2587           
2588           p0->error = error_node->errors[error0];
2589           p0->current_data -= rw_len0;
2590           p0->current_length += rw_len0;
2591           vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
2592             adj0[0].rewrite_header.sw_if_index;
2593           
2594           next0 = (error0 == IP4_ERROR_NONE)
2595             ? adj0[0].rewrite_header.next_index : 0;
2596
2597           if (rewrite_for_locally_received_packets)
2598               next0 = next0 && next0_override ? next0_override : next0;
2599
2600           from += 1;
2601           n_left_from -= 1;
2602           to_next += 1;
2603           n_left_to_next -= 1;
2604       
2605           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2606                                            to_next, n_left_to_next,
2607                                            pi0, next0);
2608         }
2609   
2610       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2611     }
2612
2613   /* Need to do trace after rewrites to pick up new packet data. */
2614   if (node->flags & VLIB_NODE_FLAG_TRACE)
2615     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2616
2617   return frame->n_vectors;
2618 }
2619
2620 static uword
2621 ip4_rewrite_transit (vlib_main_t * vm,
2622                      vlib_node_runtime_t * node,
2623                      vlib_frame_t * frame)
2624 {
2625   return ip4_rewrite_inline (vm, node, frame,
2626                              /* rewrite_for_locally_received_packets */ 0);
2627 }
2628
2629 static uword
2630 ip4_rewrite_local (vlib_main_t * vm,
2631                    vlib_node_runtime_t * node,
2632                    vlib_frame_t * frame)
2633 {
2634   return ip4_rewrite_inline (vm, node, frame,
2635                              /* rewrite_for_locally_received_packets */ 1);
2636 }
2637
2638 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2639   .function = ip4_rewrite_transit,
2640   .name = "ip4-rewrite-transit",
2641   .vector_size = sizeof (u32),
2642
2643   .format_trace = format_ip4_forward_next_trace,
2644
2645   .n_next_nodes = 2,
2646   .next_nodes = {
2647     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2648     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2649   },
2650 };
2651
2652 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2653   .function = ip4_rewrite_local,
2654   .name = "ip4-rewrite-local",
2655   .vector_size = sizeof (u32),
2656
2657   .sibling_of = "ip4-rewrite-transit",
2658
2659   .format_trace = format_ip4_forward_next_trace,
2660
2661   .n_next_nodes = 2,
2662   .next_nodes = {
2663     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2664     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2665   },
2666 };
2667
2668 static clib_error_t *
2669 add_del_interface_table (vlib_main_t * vm,
2670                          unformat_input_t * input,
2671                          vlib_cli_command_t * cmd)
2672 {
2673   vnet_main_t * vnm = vnet_get_main();
2674   clib_error_t * error = 0;
2675   u32 sw_if_index, table_id;
2676
2677   sw_if_index = ~0;
2678
2679   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2680     {
2681       error = clib_error_return (0, "unknown interface `%U'",
2682                                  format_unformat_error, input);
2683       goto done;
2684     }
2685
2686   if (unformat (input, "%d", &table_id))
2687     ;
2688   else
2689     {
2690       error = clib_error_return (0, "expected table id `%U'",
2691                                  format_unformat_error, input);
2692       goto done;
2693     }
2694
2695   {
2696     ip4_main_t * im = &ip4_main;
2697     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2698
2699     if (fib) 
2700       {
2701         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2702         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2703     }
2704   }
2705
2706  done:
2707   return error;
2708 }
2709
2710 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2711   .path = "set interface ip table",
2712   .function = add_del_interface_table,
2713   .short_help = "Add/delete FIB table id for interface",
2714 };
2715
2716
2717 static uword
2718 ip4_lookup_multicast (vlib_main_t * vm,
2719                       vlib_node_runtime_t * node,
2720                       vlib_frame_t * frame)
2721 {
2722   ip4_main_t * im = &ip4_main;
2723   ip_lookup_main_t * lm = &im->lookup_main;
2724   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2725   u32 n_left_from, n_left_to_next, * from, * to_next;
2726   ip_lookup_next_t next;
2727   u32 cpu_index = os_get_cpu_number();
2728
2729   from = vlib_frame_vector_args (frame);
2730   n_left_from = frame->n_vectors;
2731   next = node->cached_next_index;
2732
2733   while (n_left_from > 0)
2734     {
2735       vlib_get_next_frame (vm, node, next,
2736                            to_next, n_left_to_next);
2737
2738       while (n_left_from >= 4 && n_left_to_next >= 2)
2739         {
2740           vlib_buffer_t * p0, * p1;
2741           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2742           ip_lookup_next_t next0, next1;
2743           ip4_header_t * ip0, * ip1;
2744           ip_adjacency_t * adj0, * adj1;
2745           u32 fib_index0, fib_index1;
2746           u32 flow_hash_config0, flow_hash_config1;
2747
2748           /* Prefetch next iteration. */
2749           {
2750             vlib_buffer_t * p2, * p3;
2751
2752             p2 = vlib_get_buffer (vm, from[2]);
2753             p3 = vlib_get_buffer (vm, from[3]);
2754
2755             vlib_prefetch_buffer_header (p2, LOAD);
2756             vlib_prefetch_buffer_header (p3, LOAD);
2757
2758             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2759             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2760           }
2761
2762           pi0 = to_next[0] = from[0];
2763           pi1 = to_next[1] = from[1];
2764
2765           p0 = vlib_get_buffer (vm, pi0);
2766           p1 = vlib_get_buffer (vm, pi1);
2767
2768           ip0 = vlib_buffer_get_current (p0);
2769           ip1 = vlib_buffer_get_current (p1);
2770
2771           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2772           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2773           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2774             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2775           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2776             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2777
2778           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2779                                               &ip0->dst_address, p0);
2780           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
2781                                               &ip1->dst_address, p1);
2782
2783           adj0 = ip_get_adjacency (lm, adj_index0);
2784           adj1 = ip_get_adjacency (lm, adj_index1);
2785
2786           next0 = adj0->lookup_next_index;
2787           next1 = adj1->lookup_next_index;
2788
2789           flow_hash_config0 = 
2790               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2791
2792           flow_hash_config1 = 
2793               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2794
2795           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2796               (ip0, flow_hash_config0);
2797                                                                   
2798           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2799               (ip1, flow_hash_config1);
2800
2801           ASSERT (adj0->n_adj > 0);
2802           ASSERT (adj1->n_adj > 0);
2803           ASSERT (is_pow2 (adj0->n_adj));
2804           ASSERT (is_pow2 (adj1->n_adj));
2805           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2806           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2807
2808           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2809           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2810
2811           if (1) /* $$$$$$ HACK FIXME */
2812           vlib_increment_combined_counter 
2813               (cm, cpu_index, adj_index0, 1,
2814                vlib_buffer_length_in_chain (vm, p0));
2815           if (1) /* $$$$$$ HACK FIXME */
2816           vlib_increment_combined_counter 
2817               (cm, cpu_index, adj_index1, 1,
2818                vlib_buffer_length_in_chain (vm, p1));
2819
2820           from += 2;
2821           to_next += 2;
2822           n_left_to_next -= 2;
2823           n_left_from -= 2;
2824
2825           wrong_next = (next0 != next) + 2*(next1 != next);
2826           if (PREDICT_FALSE (wrong_next != 0))
2827             {
2828               switch (wrong_next)
2829                 {
2830                 case 1:
2831                   /* A B A */
2832                   to_next[-2] = pi1;
2833                   to_next -= 1;
2834                   n_left_to_next += 1;
2835                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2836                   break;
2837
2838                 case 2:
2839                   /* A A B */
2840                   to_next -= 1;
2841                   n_left_to_next += 1;
2842                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2843                   break;
2844
2845                 case 3:
2846                   /* A B C */
2847                   to_next -= 2;
2848                   n_left_to_next += 2;
2849                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2850                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2851                   if (next0 == next1)
2852                     {
2853                       /* A B B */
2854                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2855                       next = next1;
2856                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2857                     }
2858                 }
2859             }
2860         }
2861     
2862       while (n_left_from > 0 && n_left_to_next > 0)
2863         {
2864           vlib_buffer_t * p0;
2865           ip4_header_t * ip0;
2866           u32 pi0, adj_index0;
2867           ip_lookup_next_t next0;
2868           ip_adjacency_t * adj0;
2869           u32 fib_index0;
2870           u32 flow_hash_config0;
2871
2872           pi0 = from[0];
2873           to_next[0] = pi0;
2874
2875           p0 = vlib_get_buffer (vm, pi0);
2876
2877           ip0 = vlib_buffer_get_current (p0);
2878
2879           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2880                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2881           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2882               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2883           
2884           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
2885                                               &ip0->dst_address, p0);
2886
2887           adj0 = ip_get_adjacency (lm, adj_index0);
2888
2889           next0 = adj0->lookup_next_index;
2890
2891           flow_hash_config0 = 
2892               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2893
2894           vnet_buffer (p0)->ip.flow_hash = 
2895             ip4_compute_flow_hash (ip0, flow_hash_config0);
2896
2897           ASSERT (adj0->n_adj > 0);
2898           ASSERT (is_pow2 (adj0->n_adj));
2899           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2900
2901           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2902
2903           if (1) /* $$$$$$ HACK FIXME */
2904               vlib_increment_combined_counter 
2905                   (cm, cpu_index, adj_index0, 1,
2906                    vlib_buffer_length_in_chain (vm, p0));
2907
2908           from += 1;
2909           to_next += 1;
2910           n_left_to_next -= 1;
2911           n_left_from -= 1;
2912
2913           if (PREDICT_FALSE (next0 != next))
2914             {
2915               n_left_to_next += 1;
2916               vlib_put_next_frame (vm, node, next, n_left_to_next);
2917               next = next0;
2918               vlib_get_next_frame (vm, node, next,
2919                                    to_next, n_left_to_next);
2920               to_next[0] = pi0;
2921               to_next += 1;
2922               n_left_to_next -= 1;
2923             }
2924         }
2925
2926       vlib_put_next_frame (vm, node, next, n_left_to_next);
2927     }
2928
2929   return frame->n_vectors;
2930 }
2931
2932 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2933   .function = ip4_lookup_multicast,
2934   .name = "ip4-lookup-multicast",
2935   .vector_size = sizeof (u32),
2936
2937   .n_next_nodes = IP_LOOKUP_N_NEXT,
2938   .next_nodes = {
2939     [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
2940     [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
2941     [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
2942     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
2943     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
2944     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
2945     [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
2946     [IP_LOOKUP_NEXT_MAP] = "ip4-map",
2947     [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
2948     [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
2949     [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
2950     [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
2951     [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
2952   },
2953 };
2954
2955 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2956   .function = ip4_drop,
2957   .name = "ip4-multicast",
2958   .vector_size = sizeof (u32),
2959
2960   .format_trace = format_ip4_forward_next_trace,
2961
2962   .n_next_nodes = 1,
2963   .next_nodes = {
2964     [0] = "error-drop",
2965   },
2966 };
2967
2968 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2969 {
2970   ip4_main_t * im = &ip4_main;
2971   ip4_fib_mtrie_t * mtrie0;
2972   ip4_fib_mtrie_leaf_t leaf0;
2973   u32 adj_index0;
2974     
2975   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2976
2977   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2978   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2979   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2980   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2981   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2982   
2983   /* Handle default route. */
2984   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2985   
2986   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2987   
2988   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2989                                                   a, 
2990                                                   /* no_default_route */ 0);
2991 }
2992  
2993 static clib_error_t *
2994 test_lookup_command_fn (vlib_main_t * vm,
2995                         unformat_input_t * input,
2996                         vlib_cli_command_t * cmd)
2997 {
2998   u32 table_id = 0;
2999   f64 count = 1;
3000   u32 n;
3001   int i;
3002   ip4_address_t ip4_base_address;
3003   u64 errors = 0;
3004
3005   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3006       if (unformat (input, "table %d", &table_id))
3007         ;
3008       else if (unformat (input, "count %f", &count))
3009         ;
3010
3011       else if (unformat (input, "%U",
3012                          unformat_ip4_address, &ip4_base_address))
3013         ;
3014       else
3015         return clib_error_return (0, "unknown input `%U'",
3016                                   format_unformat_error, input);
3017   }
3018
3019   n = count;
3020
3021   for (i = 0; i < n; i++)
3022     {
3023       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3024         errors++;
3025
3026       ip4_base_address.as_u32 = 
3027         clib_host_to_net_u32 (1 + 
3028                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3029     }
3030
3031   if (errors) 
3032     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3033   else
3034     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3035
3036   return 0;
3037 }
3038
3039 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3040     .path = "test lookup",
3041     .short_help = "test lookup",
3042     .function = test_lookup_command_fn,
3043 };
3044
3045 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3046 {
3047   ip4_main_t * im4 = &ip4_main;
3048   ip4_fib_t * fib;
3049   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3050
3051   if (p == 0)
3052     return VNET_API_ERROR_NO_SUCH_FIB;
3053
3054   fib = vec_elt_at_index (im4->fibs, p[0]);
3055
3056   fib->flow_hash_config = flow_hash_config;
3057   return 0;
3058 }
3059  
3060 static clib_error_t *
3061 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3062                              unformat_input_t * input,
3063                              vlib_cli_command_t * cmd)
3064 {
3065   int matched = 0;
3066   u32 table_id = 0;
3067   u32 flow_hash_config = 0;
3068   int rv;
3069
3070   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3071     if (unformat (input, "table %d", &table_id))
3072       matched = 1;
3073 #define _(a,v) \
3074     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3075     foreach_flow_hash_bit
3076 #undef _
3077     else break;
3078   }
3079   
3080   if (matched == 0)
3081     return clib_error_return (0, "unknown input `%U'",
3082                               format_unformat_error, input);
3083   
3084   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3085   switch (rv)
3086     {
3087     case 0:
3088       break;
3089       
3090     case VNET_API_ERROR_NO_SUCH_FIB:
3091       return clib_error_return (0, "no such FIB table %d", table_id);
3092       
3093     default:
3094       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3095       break;
3096     }
3097   
3098   return 0;
3099 }
3100  
3101 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3102   .path = "set ip flow-hash",
3103   .short_help = 
3104   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3105   .function = set_ip_flow_hash_command_fn,
3106 };
3107  
3108 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3109                                  u32 table_index)
3110 {
3111   vnet_main_t * vnm = vnet_get_main();
3112   vnet_interface_main_t * im = &vnm->interface_main;
3113   ip4_main_t * ipm = &ip4_main;
3114   ip_lookup_main_t * lm = &ipm->lookup_main;
3115   vnet_classify_main_t * cm = &vnet_classify_main;
3116
3117   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3118     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3119
3120   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3121     return VNET_API_ERROR_NO_SUCH_ENTRY;
3122
3123   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3124   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3125
3126   return 0;
3127 }
3128
3129 static clib_error_t *
3130 set_ip_classify_command_fn (vlib_main_t * vm,
3131                             unformat_input_t * input,
3132                             vlib_cli_command_t * cmd)
3133 {
3134   u32 table_index = ~0;
3135   int table_index_set = 0;
3136   u32 sw_if_index = ~0;
3137   int rv;
3138   
3139   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3140     if (unformat (input, "table-index %d", &table_index))
3141       table_index_set = 1;
3142     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3143                        vnet_get_main(), &sw_if_index))
3144       ;
3145     else
3146       break;
3147   }
3148       
3149   if (table_index_set == 0)
3150     return clib_error_return (0, "classify table-index must be specified");
3151
3152   if (sw_if_index == ~0)
3153     return clib_error_return (0, "interface / subif must be specified");
3154
3155   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3156
3157   switch (rv)
3158     {
3159     case 0:
3160       break;
3161
3162     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3163       return clib_error_return (0, "No such interface");
3164
3165     case VNET_API_ERROR_NO_SUCH_ENTRY:
3166       return clib_error_return (0, "No such classifier table");
3167     }
3168   return 0;
3169 }
3170
3171 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3172     .path = "set ip classify",
3173     .short_help = 
3174     "set ip classify intfc <int> table-index <index>",
3175     .function = set_ip_classify_command_fn,
3176 };
3177