1862d89d7bc86718839cd8a16813cf64645d8232
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>   
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>     
51
52 /** @file
53     vnet ip4 forwarding
54 */
55
56 /* This is really, really simple but stupid fib. */
57 u32
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
59                            ip4_address_t * dst,
60                            u32 disable_default_route)
61 {
62   ip_lookup_main_t * lm = &im->lookup_main;
63   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64   uword * p, * hash, key;
65   i32 i, i_min, dst_address, ai;
66
67   i_min = disable_default_route ? 1 : 0;
68   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
70     {
71       hash = fib->adj_index_by_dst_address[i];
72       if (! hash)
73         continue;
74
75       key = dst_address & im->fib_masks[i];
76       if ((p = hash_get (hash, key)) != 0)
77         {
78           ai = p[0];
79           goto done;
80         }
81     }
82     
83   /* Nothing matches in table. */
84   ai = lm->miss_adj_index;
85
86  done:
87   return ai;
88 }
89
90 static ip4_fib_t *
91 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
92 {
93   ip4_fib_t * fib;
94   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
95   vec_add2 (im->fibs, fib, 1);
96   fib->table_id = table_id;
97   fib->index = fib - im->fibs;
98   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
99   fib->fwd_classify_table_index = ~0;
100   fib->rev_classify_table_index = ~0;
101   ip4_mtrie_init (&fib->mtrie);
102   return fib;
103 }
104
105 ip4_fib_t *
106 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
107                                    u32 table_index_or_id, u32 flags)
108 {
109   uword * p, fib_index;
110
111   fib_index = table_index_or_id;
112   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
113     {
114       if (table_index_or_id == ~0) {
115         table_index_or_id = 0;
116         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
117           table_index_or_id++;
118         }
119         return create_fib_with_table_id (im, table_index_or_id);
120       }
121
122       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
123       if (! p)
124         return create_fib_with_table_id (im, table_index_or_id);
125       fib_index = p[0];
126     }
127   return vec_elt_at_index (im->fibs, fib_index);
128 }
129
130 static void
131 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
132                                        ip4_fib_t * fib,
133                                        u32 address_length)
134 {
135   hash_t * h;
136   uword max_index;
137
138   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
139   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
140
141   fib->adj_index_by_dst_address[address_length] =
142     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
143
144   hash_set_flags (fib->adj_index_by_dst_address[address_length],
145                   HASH_FLAG_NO_AUTO_SHRINK);
146
147   h = hash_header (fib->adj_index_by_dst_address[address_length]);
148   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
149
150   /* Initialize new/old hash value vectors. */
151   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
152   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
153 }
154
155 static void
156 ip4_fib_set_adj_index (ip4_main_t * im,
157                        ip4_fib_t * fib,
158                        u32 flags,
159                        u32 dst_address_u32,
160                        u32 dst_address_length,
161                        u32 adj_index)
162 {
163   ip_lookup_main_t * lm = &im->lookup_main;
164   uword * hash;
165
166   if (vec_bytes(fib->old_hash_values))
167     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
168   if (vec_bytes(fib->new_hash_values))
169     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
170   fib->new_hash_values[0] = adj_index;
171
172   /* Make sure adj index is valid. */
173   if (CLIB_DEBUG > 0)
174     (void) ip_get_adjacency (lm, adj_index);
175
176   hash = fib->adj_index_by_dst_address[dst_address_length];
177
178   hash = _hash_set3 (hash, dst_address_u32,
179                      fib->new_hash_values,
180                      fib->old_hash_values);
181
182   fib->adj_index_by_dst_address[dst_address_length] = hash;
183
184   if (vec_len (im->add_del_route_callbacks) > 0)
185     {
186       ip4_add_del_route_callback_t * cb;
187       ip4_address_t d;
188       uword * p;
189
190       d.data_u32 = dst_address_u32;
191       vec_foreach (cb, im->add_del_route_callbacks)
192         if ((flags & cb->required_flags) == cb->required_flags)
193           cb->function (im, cb->function_opaque,
194                         fib, flags,
195                         &d, dst_address_length,
196                         fib->old_hash_values,
197                         fib->new_hash_values);
198
199       p = hash_get (hash, dst_address_u32);
200       /* hash_get should never return NULL here */
201       if (p)
202           clib_memcpy (p, fib->new_hash_values, 
203                        vec_bytes (fib->new_hash_values));
204       else
205           ASSERT(0);
206     }
207 }
208
209 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
210 {
211   ip_lookup_main_t * lm = &im->lookup_main;
212   ip4_fib_t * fib;
213   u32 dst_address, dst_address_length, adj_index, old_adj_index;
214   uword * hash, is_del;
215   ip4_add_del_route_callback_t * cb;
216
217   /* Either create new adjacency or use given one depending on arguments. */
218   if (a->n_add_adj > 0)
219     {
220       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
221       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
222     }
223   else
224     adj_index = a->adj_index;
225
226   dst_address = a->dst_address.data_u32;
227   dst_address_length = a->dst_address_length;
228   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
229
230   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
231   dst_address &= im->fib_masks[dst_address_length];
232
233   if (! fib->adj_index_by_dst_address[dst_address_length])
234     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
235
236   hash = fib->adj_index_by_dst_address[dst_address_length];
237
238   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
239
240   if (is_del)
241     {
242       fib->old_hash_values[0] = ~0;
243       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
244       fib->adj_index_by_dst_address[dst_address_length] = hash;
245
246       if (vec_len (im->add_del_route_callbacks) > 0
247           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
248         {
249           fib->new_hash_values[0] = ~0;
250           vec_foreach (cb, im->add_del_route_callbacks)
251             if ((a->flags & cb->required_flags) == cb->required_flags)
252               cb->function (im, cb->function_opaque,
253                             fib, a->flags,
254                             &a->dst_address, dst_address_length,
255                             fib->old_hash_values,
256                             fib->new_hash_values);
257         }
258     }
259   else
260     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
261                            adj_index);
262
263   old_adj_index = fib->old_hash_values[0];
264
265   /* Avoid spurious reference count increments */
266   if (old_adj_index == adj_index
267       && adj_index != ~0
268       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
269     {
270       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
271       if (adj->share_count > 0)
272         adj->share_count --;
273     }
274
275   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
276                                is_del ? old_adj_index : adj_index,
277                                is_del);
278
279   /* Delete old adjacency index if present and changed. */
280   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
281       && old_adj_index != ~0
282       && old_adj_index != adj_index)
283     ip_del_adjacency (lm, old_adj_index);
284 }
285
286
287 u32
288 ip4_route_get_next_hop_adj (ip4_main_t * im,
289                             u32 fib_index,
290                             ip4_address_t *next_hop,
291                             u32 next_hop_sw_if_index,
292                             u32 explicit_fib_index)
293 {
294   ip_lookup_main_t * lm = &im->lookup_main;
295   vnet_main_t * vnm = vnet_get_main();
296   uword * nh_hash, * nh_result;
297   int is_interface_next_hop;
298   u32 nh_adj_index;
299   ip4_fib_t * fib;
300
301   fib = vec_elt_at_index (im->fibs, fib_index);
302
303   is_interface_next_hop = next_hop->data_u32 == 0;
304   if (is_interface_next_hop)
305     {
306       nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
307       if (nh_result)
308           nh_adj_index = *nh_result;
309       else
310         {
311            ip_adjacency_t * adj;
312            adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
313                                    &nh_adj_index);
314            ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
315            ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
316            hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
317         }
318     }
319   else if (next_hop_sw_if_index == ~0)
320     {
321       /* next-hop is recursive. we always need a indirect adj
322        * for recursive paths. Any LPM we perform now will give
323        * us a valid adj, but without tracking the next-hop we
324        * have no way to keep it valid.
325        */
326       ip_adjacency_t add_adj;
327       memset (&add_adj, 0, sizeof(add_adj));
328       add_adj.n_adj = 1;
329       add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
330       add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
331       add_adj.explicit_fib_index = explicit_fib_index;
332       ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
333     }
334   else
335     {
336       nh_hash = fib->adj_index_by_dst_address[32];
337       nh_result = hash_get (nh_hash, next_hop->data_u32);
338
339       /* Next hop must be known. */
340       if (! nh_result)
341         {
342           ip_adjacency_t * adj;
343
344           /* no /32 exists, get the longest prefix match */
345           nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
346                                                     next_hop, 0);
347           adj = ip_get_adjacency (lm, nh_adj_index);
348           /* if ARP interface adjacency is present, we need to
349              install ARP adjaceny for specific next hop */
350           if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
351               adj->arp.next_hop.ip4.as_u32 == 0)
352             {
353               nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
354             }
355         }
356       else
357         {
358           nh_adj_index = *nh_result;
359         }
360     }
361
362   return (nh_adj_index);
363 }
364
365 void
366 ip4_add_del_route_next_hop (ip4_main_t * im,
367                             u32 flags,
368                             ip4_address_t * dst_address,
369                             u32 dst_address_length,
370                             ip4_address_t * next_hop,
371                             u32 next_hop_sw_if_index,
372                             u32 next_hop_weight, u32 adj_index, 
373                             u32 explicit_fib_index)
374 {
375   vnet_main_t * vnm = vnet_get_main();
376   ip_lookup_main_t * lm = &im->lookup_main;
377   u32 fib_index;
378   ip4_fib_t * fib;
379   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
380   u32 dst_adj_index, nh_adj_index;
381   uword * dst_hash, * dst_result;
382   ip_adjacency_t * dst_adj;
383   ip_multipath_adjacency_t * old_mp, * new_mp;
384   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
385   clib_error_t * error = 0;
386
387   if (explicit_fib_index == (u32)~0)
388       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
389   else
390       fib_index = explicit_fib_index;
391
392   fib = vec_elt_at_index (im->fibs, fib_index);
393
394   /* Lookup next hop to be added or deleted. */
395   if (adj_index == (u32)~0)
396     {
397         nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
398                                                   next_hop,
399                                                   next_hop_sw_if_index,
400                                                   explicit_fib_index);
401     }
402   else
403     {
404       nh_adj_index = adj_index;
405     }
406   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
407   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
408
409   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
410   dst_result = hash_get (dst_hash, dst_address_u32);
411   if (dst_result)
412     {
413       dst_adj_index = dst_result[0];
414       dst_adj = ip_get_adjacency (lm, dst_adj_index);
415     }
416   else
417     {
418       /* For deletes destination must be known. */
419       if (is_del)
420         {
421           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
422           error = clib_error_return (0, "unknown destination %U/%d",
423                                      format_ip4_address, dst_address,
424                                      dst_address_length);
425           goto done;
426         }
427
428       dst_adj_index = ~0;
429       dst_adj = 0;
430     }
431
432   /* Ignore adds of X/32 with next hop of X. */
433   if (! is_del
434       && dst_address_length == 32
435       && dst_address->data_u32 == next_hop->data_u32 
436       && adj_index != (u32)~0)
437     {
438       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
439       error = clib_error_return (0, "prefix matches next hop %U/%d",
440                                  format_ip4_address, dst_address,
441                                  dst_address_length);
442       goto done;
443     }
444
445   /* Destination is not known and default weight is set so add route
446      to existing non-multipath adjacency */
447   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
448     {
449       /* create / delete additional mapping of existing adjacency */
450       ip4_add_del_route_args_t a;
451
452       a.table_index_or_table_id = fib_index;
453       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
454                  | IP4_ROUTE_FLAG_FIB_INDEX
455                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
456                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
457                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
458       a.dst_address = dst_address[0];
459       a.dst_address_length = dst_address_length;
460       a.adj_index = nh_adj_index;
461       a.add_adj = 0;
462       a.n_add_adj = 0;
463
464       ip4_add_del_route (im, &a);
465       goto done;
466     }
467
468   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
469
470   if (! ip_multipath_adjacency_add_del_next_hop
471       (lm, is_del,
472        old_mp_adj_index,
473        nh_adj_index,
474        next_hop_weight,
475        &new_mp_adj_index))
476     {
477       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
478       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
479                                  format_ip4_address, next_hop);
480       goto done;
481     }
482   
483   old_mp = new_mp = 0;
484   if (old_mp_adj_index != ~0)
485     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
486   if (new_mp_adj_index != ~0)
487     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
488
489   if (old_mp != new_mp)
490     {
491       ip4_add_del_route_args_t a;
492       ip_adjacency_t * adj;
493
494       a.table_index_or_table_id = fib_index;
495       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
496                  | IP4_ROUTE_FLAG_FIB_INDEX
497                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
498                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
499       a.dst_address = dst_address[0];
500       a.dst_address_length = dst_address_length;
501       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
502       a.add_adj = 0;
503       a.n_add_adj = 0;
504
505       ip4_add_del_route (im, &a);
506
507       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
508       if (adj->n_adj == 1)
509         adj->share_count += is_del ? -1 : 1;
510     }
511
512  done:
513   if (error)
514     clib_error_report (error);
515 }
516
517 void *
518 ip4_get_route (ip4_main_t * im,
519                u32 table_index_or_table_id,
520                u32 flags,
521                u8 * address,
522                u32 address_length)
523 {
524   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
525   u32 dst_address = * (u32 *) address;
526   uword * hash, * p;
527
528   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
529   dst_address &= im->fib_masks[address_length];
530
531   hash = fib->adj_index_by_dst_address[address_length];
532   p = hash_get (hash, dst_address);
533   return (void *) p;
534 }
535
536 void
537 ip4_foreach_matching_route (ip4_main_t * im,
538                             u32 table_index_or_table_id,
539                             u32 flags,
540                             ip4_address_t * address,
541                             u32 address_length,
542                             ip4_address_t ** results,
543                             u8 ** result_lengths)
544 {
545   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
546   u32 dst_address = address->data_u32;
547   u32 this_length = address_length;
548   
549   if (*results)
550     _vec_len (*results) = 0;
551   if (*result_lengths)
552     _vec_len (*result_lengths) = 0;
553
554   while (this_length <= 32 && vec_len (results) == 0)
555     {
556       uword k, v;
557       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
558         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
559           {
560             ip4_address_t a;
561             a.data_u32 = k;
562             vec_add1 (*results, a);
563             vec_add1 (*result_lengths, this_length);
564           }
565       }));
566
567       this_length++;
568     }
569 }
570
571 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
572                                   u32 table_index_or_table_id,
573                                   u32 flags)
574 {
575   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
576   ip_lookup_main_t * lm = &im->lookup_main;
577   u32 i, l;
578   ip4_address_t a;
579   ip4_add_del_route_callback_t * cb;
580   static ip4_address_t * to_delete;
581
582   if (lm->n_adjacency_remaps == 0)
583     return;
584
585   for (l = 0; l <= 32; l++)
586     {
587       hash_pair_t * p;
588       uword * hash = fib->adj_index_by_dst_address[l];
589
590       if (hash_elts (hash) == 0)
591         continue;
592
593       if (to_delete)
594         _vec_len (to_delete) = 0;
595
596       hash_foreach_pair (p, hash, ({
597         u32 adj_index = p->value[0];
598         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
599
600         if (m)
601           {
602             /* Record destination address from hash key. */
603             a.data_u32 = p->key;
604
605             /* New adjacency points to nothing: so delete prefix. */
606             if (m == ~0)
607               vec_add1 (to_delete, a);
608             else
609               {
610                 /* Remap to new adjacency. */
611                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
612
613                 /* Set new adjacency value. */
614                 fib->new_hash_values[0] = p->value[0] = m - 1;
615
616                 vec_foreach (cb, im->add_del_route_callbacks)
617                   if ((flags & cb->required_flags) == cb->required_flags)
618                     cb->function (im, cb->function_opaque,
619                                   fib, flags | IP4_ROUTE_FLAG_ADD,
620                                   &a, l,
621                                   fib->old_hash_values,
622                                   fib->new_hash_values);
623               }
624           }
625       }));
626
627       fib->new_hash_values[0] = ~0;
628       for (i = 0; i < vec_len (to_delete); i++)
629         {
630           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
631           vec_foreach (cb, im->add_del_route_callbacks)
632             if ((flags & cb->required_flags) == cb->required_flags)
633               cb->function (im, cb->function_opaque,
634                             fib, flags | IP4_ROUTE_FLAG_DEL,
635                             &a, l,
636                             fib->old_hash_values,
637                             fib->new_hash_values);
638         }
639     }
640
641   /* Also remap adjacencies in mtrie. */
642   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
643
644   /* Reset mapping table. */
645   vec_zero (lm->adjacency_remap_table);
646
647   /* All remaps have been performed. */
648   lm->n_adjacency_remaps = 0;
649 }
650
651 void ip4_delete_matching_routes (ip4_main_t * im,
652                                  u32 table_index_or_table_id,
653                                  u32 flags,
654                                  ip4_address_t * address,
655                                  u32 address_length)
656 {
657   static ip4_address_t * matching_addresses;
658   static u8 * matching_address_lengths;
659   u32 l, i;
660   ip4_add_del_route_args_t a;
661
662   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
663   a.table_index_or_table_id = table_index_or_table_id;
664   a.adj_index = ~0;
665   a.add_adj = 0;
666   a.n_add_adj = 0;
667
668   for (l = address_length + 1; l <= 32; l++)
669     {
670       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
671                                   address,
672                                   l,
673                                   &matching_addresses,
674                                   &matching_address_lengths);
675       for (i = 0; i < vec_len (matching_addresses); i++)
676         {
677           a.dst_address = matching_addresses[i];
678           a.dst_address_length = matching_address_lengths[i];
679           ip4_add_del_route (im, &a);
680         }
681     }
682
683   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
684 }
685
686 void
687 ip4_forward_next_trace (vlib_main_t * vm,
688                         vlib_node_runtime_t * node,
689                         vlib_frame_t * frame,
690                         vlib_rx_or_tx_t which_adj_index);
691
692 always_inline uword
693 ip4_lookup_inline (vlib_main_t * vm,
694                    vlib_node_runtime_t * node,
695                    vlib_frame_t * frame,
696                    int lookup_for_responses_to_locally_received_packets,
697                    int is_indirect)
698 {
699   ip4_main_t * im = &ip4_main;
700   ip_lookup_main_t * lm = &im->lookup_main;
701   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
702   u32 n_left_from, n_left_to_next, * from, * to_next;
703   ip_lookup_next_t next;
704   u32 cpu_index = os_get_cpu_number();
705
706   from = vlib_frame_vector_args (frame);
707   n_left_from = frame->n_vectors;
708   next = node->cached_next_index;
709
710   while (n_left_from > 0)
711     {
712       vlib_get_next_frame (vm, node, next,
713                            to_next, n_left_to_next);
714
715       while (n_left_from >= 4 && n_left_to_next >= 2)
716         {
717           vlib_buffer_t * p0, * p1;
718           ip4_header_t * ip0, * ip1;
719           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
720           ip_lookup_next_t next0, next1;
721           ip_adjacency_t * adj0, * adj1;
722           ip4_fib_mtrie_t * mtrie0, * mtrie1;
723           ip4_fib_mtrie_leaf_t leaf0, leaf1;
724           ip4_address_t * dst_addr0, *dst_addr1;
725           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
726           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
727           u32 flow_hash_config0, flow_hash_config1;
728           u32 hash_c0, hash_c1;
729           u32 wrong_next;
730
731           /* Prefetch next iteration. */
732           {
733             vlib_buffer_t * p2, * p3;
734
735             p2 = vlib_get_buffer (vm, from[2]);
736             p3 = vlib_get_buffer (vm, from[3]);
737
738             vlib_prefetch_buffer_header (p2, LOAD);
739             vlib_prefetch_buffer_header (p3, LOAD);
740
741             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
742             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
743           }
744
745           pi0 = to_next[0] = from[0];
746           pi1 = to_next[1] = from[1];
747
748           p0 = vlib_get_buffer (vm, pi0);
749           p1 = vlib_get_buffer (vm, pi1);
750
751           ip0 = vlib_buffer_get_current (p0);
752           ip1 = vlib_buffer_get_current (p1);
753
754           if (is_indirect)
755             {
756               ip_adjacency_t * iadj0, * iadj1;
757               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
758               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
759               dst_addr0 = &iadj0->indirect.next_hop.ip4;
760               dst_addr1 = &iadj1->indirect.next_hop.ip4;
761             }
762           else
763             {
764               dst_addr0 = &ip0->dst_address;
765               dst_addr1 = &ip1->dst_address;
766             }
767
768           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
769           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
770           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
771             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
772           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
773             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
774
775
776           if (! lookup_for_responses_to_locally_received_packets)
777             {
778               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
779               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
780
781               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
782
783               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
784               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
785             }
786
787           tcp0 = (void *) (ip0 + 1);
788           tcp1 = (void *) (ip1 + 1);
789
790           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
791                          || ip0->protocol == IP_PROTOCOL_UDP);
792           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
793                          || ip1->protocol == IP_PROTOCOL_UDP);
794
795           if (! lookup_for_responses_to_locally_received_packets)
796             {
797               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
798               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
799             }
800
801           if (! lookup_for_responses_to_locally_received_packets)
802             {
803               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
804               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
805             }
806
807           if (! lookup_for_responses_to_locally_received_packets)
808             {
809               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
810               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
811             }
812
813           if (lookup_for_responses_to_locally_received_packets)
814             {
815               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
816               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
817             }
818           else
819             {
820               /* Handle default route. */
821               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
822               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
823
824               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
825               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
826             }
827
828           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
829                                                            dst_addr0,
830                                                            /* no_default_route */ 0));
831           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
832                                                            dst_addr1,
833                                                            /* no_default_route */ 0));
834           adj0 = ip_get_adjacency (lm, adj_index0);
835           adj1 = ip_get_adjacency (lm, adj_index1);
836
837           next0 = adj0->lookup_next_index;
838           next1 = adj1->lookup_next_index;
839
840           /* Use flow hash to compute multipath adjacency. */
841           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
842           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
843           if (PREDICT_FALSE (adj0->n_adj > 1))
844             {
845               flow_hash_config0 = 
846                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
847               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
848                 ip4_compute_flow_hash (ip0, flow_hash_config0);
849             }
850           if (PREDICT_FALSE(adj1->n_adj > 1))
851             {
852               flow_hash_config1 = 
853                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
854               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
855                 ip4_compute_flow_hash (ip1, flow_hash_config1);
856             }
857
858           ASSERT (adj0->n_adj > 0);
859           ASSERT (adj1->n_adj > 0);
860           ASSERT (is_pow2 (adj0->n_adj));
861           ASSERT (is_pow2 (adj1->n_adj));
862           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
863           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
864
865           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
866           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
867
868           vlib_increment_combined_counter 
869               (cm, cpu_index, adj_index0, 1,
870                vlib_buffer_length_in_chain (vm, p0) 
871                + sizeof(ethernet_header_t));
872           vlib_increment_combined_counter 
873               (cm, cpu_index, adj_index1, 1,
874                vlib_buffer_length_in_chain (vm, p1)
875                + sizeof(ethernet_header_t));
876
877           from += 2;
878           to_next += 2;
879           n_left_to_next -= 2;
880           n_left_from -= 2;
881
882           wrong_next = (next0 != next) + 2*(next1 != next);
883           if (PREDICT_FALSE (wrong_next != 0))
884             {
885               switch (wrong_next)
886                 {
887                 case 1:
888                   /* A B A */
889                   to_next[-2] = pi1;
890                   to_next -= 1;
891                   n_left_to_next += 1;
892                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
893                   break;
894
895                 case 2:
896                   /* A A B */
897                   to_next -= 1;
898                   n_left_to_next += 1;
899                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
900                   break;
901
902                 case 3:
903                   /* A B C */
904                   to_next -= 2;
905                   n_left_to_next += 2;
906                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
907                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
908                   if (next0 == next1)
909                     {
910                       /* A B B */
911                       vlib_put_next_frame (vm, node, next, n_left_to_next);
912                       next = next1;
913                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
914                     }
915                 }
916             }
917         }
918     
919       while (n_left_from > 0 && n_left_to_next > 0)
920         {
921           vlib_buffer_t * p0;
922           ip4_header_t * ip0;
923           __attribute__((unused)) tcp_header_t * tcp0;
924           ip_lookup_next_t next0;
925           ip_adjacency_t * adj0;
926           ip4_fib_mtrie_t * mtrie0;
927           ip4_fib_mtrie_leaf_t leaf0;
928           ip4_address_t * dst_addr0;
929           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
930           u32 flow_hash_config0, hash_c0;
931
932           pi0 = from[0];
933           to_next[0] = pi0;
934
935           p0 = vlib_get_buffer (vm, pi0);
936
937           ip0 = vlib_buffer_get_current (p0);
938
939           if (is_indirect)
940             {
941               ip_adjacency_t * iadj0;
942               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
943               dst_addr0 = &iadj0->indirect.next_hop.ip4;
944             }
945           else
946             {
947               dst_addr0 = &ip0->dst_address;
948             }
949
950           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
951           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
952             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
953
954           if (! lookup_for_responses_to_locally_received_packets)
955             {
956               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
957
958               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
959
960               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
961             }
962
963           tcp0 = (void *) (ip0 + 1);
964
965           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
966                          || ip0->protocol == IP_PROTOCOL_UDP);
967
968           if (! lookup_for_responses_to_locally_received_packets)
969             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
970
971           if (! lookup_for_responses_to_locally_received_packets)
972             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
973
974           if (! lookup_for_responses_to_locally_received_packets)
975             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
976
977           if (lookup_for_responses_to_locally_received_packets)
978             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
979           else
980             {
981               /* Handle default route. */
982               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
983               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
984             }
985
986           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
987                                                            dst_addr0,
988                                                            /* no_default_route */ 0));
989
990           adj0 = ip_get_adjacency (lm, adj_index0);
991
992           next0 = adj0->lookup_next_index;
993
994           /* Use flow hash to compute multipath adjacency. */
995           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
996           if (PREDICT_FALSE(adj0->n_adj > 1))
997             {
998               flow_hash_config0 = 
999                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1000
1001               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
1002                 ip4_compute_flow_hash (ip0, flow_hash_config0);
1003             }
1004
1005           ASSERT (adj0->n_adj > 0);
1006           ASSERT (is_pow2 (adj0->n_adj));
1007           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1008
1009           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1010
1011           vlib_increment_combined_counter 
1012               (cm, cpu_index, adj_index0, 1,
1013                vlib_buffer_length_in_chain (vm, p0)
1014                + sizeof(ethernet_header_t));
1015
1016           from += 1;
1017           to_next += 1;
1018           n_left_to_next -= 1;
1019           n_left_from -= 1;
1020
1021           if (PREDICT_FALSE (next0 != next))
1022             {
1023               n_left_to_next += 1;
1024               vlib_put_next_frame (vm, node, next, n_left_to_next);
1025               next = next0;
1026               vlib_get_next_frame (vm, node, next,
1027                                    to_next, n_left_to_next);
1028               to_next[0] = pi0;
1029               to_next += 1;
1030               n_left_to_next -= 1;
1031             }
1032         }
1033
1034       vlib_put_next_frame (vm, node, next, n_left_to_next);
1035     }
1036
1037   if (node->flags & VLIB_NODE_FLAG_TRACE)
1038     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1039
1040   return frame->n_vectors;
1041 }
1042
1043 /** @brief IPv4 lookup node.
1044     @node ip4-lookup
1045
1046     This is the main IPv4 lookup dispatch node.
1047
1048     @param vm vlib_main_t corresponding to the current thread
1049     @param node vlib_node_runtime_t
1050     @param frame vlib_frame_t whose contents should be dispatched
1051
1052     @par Graph mechanics: buffer metadata, next index usage
1053
1054     @em Uses:
1055     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1056         - Indicates the @c sw_if_index value of the interface that the
1057           packet was received on.
1058     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1059         - When the value is @c ~0 then the node performs a longest prefix
1060           match (LPM) for the packet destination address in the FIB attached
1061           to the receive interface.
1062         - Otherwise perform LPM for the packet destination address in the
1063           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1064           value (0, 1, ...) and not a VRF id.
1065
1066     @em Sets:
1067     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1068         - The lookup result adjacency index.
1069
1070     <em>Next Index:</em>
1071     - Dispatches the packet to the node index found in
1072       ip_adjacency_t @c adj->lookup_next_index
1073       (where @c adj is the lookup result adjacency).
1074 */
1075 static uword
1076 ip4_lookup (vlib_main_t * vm,
1077             vlib_node_runtime_t * node,
1078             vlib_frame_t * frame)
1079 {
1080   return ip4_lookup_inline (vm, node, frame,
1081                             /* lookup_for_responses_to_locally_received_packets */ 0,
1082                             /* is_indirect */ 0);
1083
1084 }
1085
1086 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1087                                         ip_adjacency_t * adj,
1088                                         u32 sw_if_index,
1089                                         u32 if_address_index)
1090 {
1091   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1092   ip_lookup_next_t n;
1093   vnet_l3_packet_type_t packet_type;
1094   u32 node_index;
1095
1096   if (hw->hw_class_index == ethernet_hw_interface_class.index
1097       || hw->hw_class_index == srp_hw_interface_class.index)
1098     {
1099       /* 
1100        * We have a bit of a problem in this case. ip4-arp uses
1101        * the rewrite_header.next_index to hand pkts to the
1102        * indicated inteface output node. We can end up in
1103        * ip4_rewrite_local, too, which also pays attention to 
1104        * rewrite_header.next index. Net result: a hack in
1105        * ip4_rewrite_local...
1106        */
1107       n = IP_LOOKUP_NEXT_ARP;
1108       node_index = ip4_arp_node.index;
1109       adj->if_address_index = if_address_index;
1110       adj->arp.next_hop.ip4.as_u32 = 0;
1111       ip46_address_reset(&adj->arp.next_hop);
1112       packet_type = VNET_L3_PACKET_TYPE_ARP;
1113     }
1114   else
1115     {
1116       n = IP_LOOKUP_NEXT_REWRITE;
1117       node_index = ip4_rewrite_node.index;
1118       packet_type = VNET_L3_PACKET_TYPE_IP4;
1119     }
1120
1121   adj->lookup_next_index = n;
1122   vnet_rewrite_for_sw_interface
1123     (vnm,
1124      packet_type,
1125      sw_if_index,
1126      node_index,
1127      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1128      &adj->rewrite_header,
1129      sizeof (adj->rewrite_data));
1130 }
1131
1132 static void
1133 ip4_add_interface_routes (u32 sw_if_index,
1134                           ip4_main_t * im, u32 fib_index,
1135                           ip_interface_address_t * a)
1136 {
1137   vnet_main_t * vnm = vnet_get_main();
1138   ip_lookup_main_t * lm = &im->lookup_main;
1139   ip_adjacency_t * adj;
1140   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1141   ip4_add_del_route_args_t x;
1142   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1143   u32 classify_table_index;
1144
1145   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1146   x.table_index_or_table_id = fib_index;
1147   x.flags = (IP4_ROUTE_FLAG_ADD
1148              | IP4_ROUTE_FLAG_FIB_INDEX
1149              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1150   x.dst_address = address[0];
1151   x.dst_address_length = a->address_length;
1152   x.n_add_adj = 0;
1153   x.add_adj = 0;
1154
1155   a->neighbor_probe_adj_index = ~0;
1156   if (a->address_length < 32)
1157     {
1158       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1159                               &x.adj_index);
1160       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1161       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1162       ip4_add_del_route (im, &x);
1163       a->neighbor_probe_adj_index = x.adj_index;
1164     }
1165   
1166   /* Add e.g. 1.1.1.1/32 as local to this host. */
1167   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1168                           &x.adj_index);
1169   
1170   classify_table_index = ~0;
1171   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1172     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1173   if (classify_table_index != (u32) ~0)
1174     {
1175       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1176       adj->classify.table_index = classify_table_index;
1177     }
1178   else
1179     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1180   
1181   adj->if_address_index = a - lm->if_address_pool;
1182   adj->rewrite_header.sw_if_index = sw_if_index;
1183   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1184   /* 
1185    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1186    * fail an RPF-ish check, but still go thru the rewrite code...
1187    */
1188   adj->rewrite_header.data_bytes = 0;
1189
1190   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1191   x.dst_address_length = 32;
1192   ip4_add_del_route (im, &x);
1193 }
1194
1195 static void
1196 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1197 {
1198   ip4_add_del_route_args_t x;
1199
1200   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1201   x.table_index_or_table_id = fib_index;
1202   x.flags = (IP4_ROUTE_FLAG_DEL
1203              | IP4_ROUTE_FLAG_FIB_INDEX
1204              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1205   x.dst_address = address[0];
1206   x.dst_address_length = address_length;
1207   x.adj_index = ~0;
1208   x.n_add_adj = 0;
1209   x.add_adj = 0;
1210
1211   if (address_length < 32)
1212     ip4_add_del_route (im, &x);
1213
1214   x.dst_address_length = 32;
1215   ip4_add_del_route (im, &x);
1216
1217   ip4_delete_matching_routes (im,
1218                               fib_index,
1219                               IP4_ROUTE_FLAG_FIB_INDEX,
1220                               address,
1221                               address_length);
1222 }
1223
1224 typedef struct {
1225     u32 sw_if_index;
1226     ip4_address_t address;
1227     u32 length;
1228 } ip4_interface_address_t;
1229
1230 static clib_error_t *
1231 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1232                                         u32 sw_if_index,
1233                                         ip4_address_t * new_address,
1234                                         u32 new_length,
1235                                         u32 redistribute,
1236                                         u32 insert_routes,
1237                                         u32 is_del);
1238
1239 static clib_error_t *
1240 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1241                                         u32 sw_if_index,
1242                                         ip4_address_t * address,
1243                                         u32 address_length,
1244                                         u32 redistribute,
1245                                         u32 insert_routes,
1246                                         u32 is_del)
1247 {
1248   vnet_main_t * vnm = vnet_get_main();
1249   ip4_main_t * im = &ip4_main;
1250   ip_lookup_main_t * lm = &im->lookup_main;
1251   clib_error_t * error = 0;
1252   u32 if_address_index, elts_before;
1253   ip4_address_fib_t ip4_af, * addr_fib = 0;
1254
1255   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1256   ip4_addr_fib_init (&ip4_af, address,
1257                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1258   vec_add1 (addr_fib, ip4_af);
1259
1260   /* When adding an address check that it does not conflict with an existing address. */
1261   if (! is_del)
1262     {
1263       ip_interface_address_t * ia;
1264       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1265                                     0 /* honor unnumbered */,
1266       ({
1267         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1268
1269         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1270             || ip4_destination_matches_route (im, x, address, address_length))
1271           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1272                                     format_ip4_address_and_length, address, address_length,
1273                                     format_ip4_address_and_length, x, ia->address_length,
1274                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1275       }));
1276     }
1277
1278   elts_before = pool_elts (lm->if_address_pool);
1279
1280   error = ip_interface_address_add_del
1281     (lm,
1282      sw_if_index,
1283      addr_fib,
1284      address_length,
1285      is_del,
1286      &if_address_index);
1287   if (error)
1288     goto done;
1289   
1290   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1291     {
1292       if (is_del)
1293         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1294                                   address_length);
1295       
1296       else
1297           ip4_add_interface_routes (sw_if_index,
1298                                     im, ip4_af.fib_index,
1299                                     pool_elt_at_index 
1300                                     (lm->if_address_pool, if_address_index));
1301     }
1302
1303   /* If pool did not grow/shrink: add duplicate address. */
1304   if (elts_before != pool_elts (lm->if_address_pool))
1305     {
1306       ip4_add_del_interface_address_callback_t * cb;
1307       vec_foreach (cb, im->add_del_interface_address_callbacks)
1308         cb->function (im, cb->function_opaque, sw_if_index,
1309                       address, address_length,
1310                       if_address_index,
1311                       is_del);
1312     }
1313
1314  done:
1315   vec_free (addr_fib);
1316   return error;
1317 }
1318
1319 clib_error_t *
1320 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1321                                ip4_address_t * address, u32 address_length,
1322                                u32 is_del)
1323 {
1324   return ip4_add_del_interface_address_internal
1325     (vm, sw_if_index, address, address_length,
1326      /* redistribute */ 1,
1327      /* insert_routes */ 1,
1328      is_del);
1329 }
1330
1331 static clib_error_t *
1332 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1333                                 u32 sw_if_index,
1334                                 u32 flags)
1335 {
1336   ip4_main_t * im = &ip4_main;
1337   ip_interface_address_t * ia;
1338   ip4_address_t * a;
1339   u32 is_admin_up, fib_index;
1340   
1341   /* Fill in lookup tables with default table (0). */
1342   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1343   
1344   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1345   
1346   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1347   
1348   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1349
1350   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1351                                 0 /* honor unnumbered */,
1352   ({
1353     a = ip_interface_address_get_address (&im->lookup_main, ia);
1354     if (is_admin_up)
1355       ip4_add_interface_routes (sw_if_index,
1356                                 im, fib_index,
1357                                 ia);
1358     else
1359       ip4_del_interface_routes (im, fib_index,
1360                                 a, ia->address_length);
1361   }));
1362
1363   return 0;
1364 }
1365  
1366 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1367
1368 /* Built-in ip4 unicast rx feature path definition */
1369 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1370   .node_name = "ip4-inacl", 
1371   .runs_before = {"ip4-source-check-via-rx", 0}, 
1372   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1373 };
1374
1375 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1376   .node_name = "ip4-source-check-via-rx",
1377   .runs_before = {"ip4-source-check-via-any", 0},
1378   .feature_index = 
1379   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1380 };
1381
1382 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1383   .node_name = "ip4-source-check-via-any",
1384   .runs_before = {"ip4-policer-classify", 0},
1385   .feature_index = 
1386   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1387 };
1388
1389 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check, static) = {
1390   .node_name = "ip4-source-and-port-range-check",
1391   .runs_before = {"ip4-policer-classify", 0},
1392   .feature_index =
1393   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1394 };
1395
1396 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1397   .node_name = "ip4-policer-classify",
1398   .runs_before = {"ipsec-input-ip4", 0},
1399   .feature_index =
1400   &ip4_main.ip4_unicast_rx_feature_policer_classify,
1401 };
1402
1403 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1404   .node_name = "ipsec-input-ip4",
1405   .runs_before = {"vpath-input-ip4", 0},
1406   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1407 };
1408
1409 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1410   .node_name = "vpath-input-ip4",
1411   .runs_before = {"ip4-lookup", 0},
1412   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1413 };
1414
1415 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1416   .node_name = "ip4-lookup",
1417   .runs_before = {0}, /* not before any other features */
1418   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1419 };
1420
1421 /* Built-in ip4 multicast rx feature path definition */
1422 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1423   .node_name = "vpath-input-ip4",
1424   .runs_before = {"ip4-lookup-multicast", 0},
1425   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1426 };
1427
1428 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1429   .node_name = "ip4-lookup-multicast",
1430   .runs_before = {0}, /* not before any other features */
1431   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1432 };
1433
1434 static char * feature_start_nodes[] = 
1435   { "ip4-input", "ip4-input-no-checksum"};
1436
1437 static clib_error_t *
1438 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1439 {
1440   ip_lookup_main_t * lm = &im->lookup_main;
1441   clib_error_t * error;
1442   vnet_cast_t cast;
1443
1444   for (cast = 0; cast < VNET_N_CAST; cast++)
1445     {
1446       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1447       vnet_config_main_t * vcm = &cm->config_main;
1448
1449       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1450                                          feature_start_nodes,
1451                                          ARRAY_LEN(feature_start_nodes),
1452                                          cast,
1453                                          1 /* is_ip4 */)))
1454         return error;
1455     }
1456   return 0;
1457 }
1458
1459 static clib_error_t *
1460 ip4_sw_interface_add_del (vnet_main_t * vnm,
1461                           u32 sw_if_index,
1462                           u32 is_add)
1463 {
1464   vlib_main_t * vm = vnm->vlib_main;
1465   ip4_main_t * im = &ip4_main;
1466   ip_lookup_main_t * lm = &im->lookup_main;
1467   u32 ci, cast;
1468   u32 feature_index;
1469
1470   for (cast = 0; cast < VNET_N_CAST; cast++)
1471     {
1472       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1473       vnet_config_main_t * vcm = &cm->config_main;
1474
1475       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1476       ci = cm->config_index_by_sw_if_index[sw_if_index];
1477
1478       if (cast == VNET_UNICAST)
1479         feature_index = im->ip4_unicast_rx_feature_lookup;
1480       else
1481         feature_index = im->ip4_multicast_rx_feature_lookup;
1482
1483       if (is_add)
1484         ci = vnet_config_add_feature (vm, vcm,
1485                                       ci,
1486                                       feature_index,
1487                                       /* config data */ 0,
1488                                       /* # bytes of config data */ 0);
1489       else
1490         ci = vnet_config_del_feature (vm, vcm,
1491                                       ci,
1492                                       feature_index,
1493                                       /* config data */ 0,
1494                                       /* # bytes of config data */ 0);
1495
1496       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1497     }
1498
1499   return /* no error */ 0;
1500 }
1501
1502 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1503
1504 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1505
1506 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1507   .function = ip4_lookup,
1508   .name = "ip4-lookup",
1509   .vector_size = sizeof (u32),
1510
1511   .format_trace = format_ip4_lookup_trace,
1512
1513   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1514   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1515 };
1516
1517 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1518
1519 static uword
1520 ip4_indirect (vlib_main_t * vm,
1521                vlib_node_runtime_t * node,
1522                vlib_frame_t * frame)
1523 {
1524   return ip4_lookup_inline (vm, node, frame,
1525                             /* lookup_for_responses_to_locally_received_packets */ 0,
1526                             /* is_indirect */ 1);
1527 }
1528
1529 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1530   .function = ip4_indirect,
1531   .name = "ip4-indirect",
1532   .vector_size = sizeof (u32),
1533   .sibling_of = "ip4-lookup",
1534   .format_trace = format_ip4_lookup_trace,
1535
1536   .n_next_nodes = 0,
1537 };
1538
1539 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1540
1541
1542 /* Global IP4 main. */
1543 ip4_main_t ip4_main;
1544
1545 clib_error_t *
1546 ip4_lookup_init (vlib_main_t * vm)
1547 {
1548   ip4_main_t * im = &ip4_main;
1549   clib_error_t * error;
1550   uword i;
1551
1552   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1553     {
1554       u32 m;
1555
1556       if (i < 32)
1557         m = pow2_mask (i) << (32 - i);
1558       else 
1559         m = ~0;
1560       im->fib_masks[i] = clib_host_to_net_u32 (m);
1561     }
1562
1563   /* Create FIB with index 0 and table id of 0. */
1564   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1565
1566   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1567
1568   {
1569     pg_node_t * pn;
1570     pn = pg_get_node (ip4_lookup_node.index);
1571     pn->unformat_edit = unformat_pg_ip4_header;
1572   }
1573
1574   {
1575     ethernet_arp_header_t h;
1576
1577     memset (&h, 0, sizeof (h));
1578
1579     /* Set target ethernet address to all zeros. */
1580     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1581
1582 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1583 #define _8(f,v) h.f = v;
1584     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1585     _16 (l3_type, ETHERNET_TYPE_IP4);
1586     _8 (n_l2_address_bytes, 6);
1587     _8 (n_l3_address_bytes, 4);
1588     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1589 #undef _16
1590 #undef _8
1591
1592     vlib_packet_template_init (vm,
1593                                &im->ip4_arp_request_packet_template,
1594                                /* data */ &h,
1595                                sizeof (h),
1596                                /* alloc chunk size */ 8,
1597                                "ip4 arp");
1598   }
1599
1600   error = ip4_feature_init (vm, im);
1601
1602   return error;
1603 }
1604
1605 VLIB_INIT_FUNCTION (ip4_lookup_init);
1606
1607 typedef struct {
1608   /* Adjacency taken. */
1609   u32 adj_index;
1610   u32 flow_hash;
1611   u32 fib_index;
1612
1613   /* Packet data, possibly *after* rewrite. */
1614   u8 packet_data[64 - 1*sizeof(u32)];
1615 } ip4_forward_next_trace_t;
1616
1617 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1618 {
1619   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1620   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1621   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1622   uword indent = format_get_indent (s);
1623   s = format (s, "%U%U",
1624                 format_white_space, indent,
1625                 format_ip4_header, t->packet_data);
1626   return s;
1627 }
1628
1629 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1630 {
1631   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1632   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1633   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1634   vnet_main_t * vnm = vnet_get_main();
1635   ip4_main_t * im = &ip4_main;
1636   uword indent = format_get_indent (s);
1637
1638   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1639               t->fib_index, t->adj_index, format_ip_adjacency,
1640               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1641   s = format (s, "\n%U%U",
1642               format_white_space, indent,
1643               format_ip4_header, t->packet_data);
1644   return s;
1645 }
1646
1647 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1648 {
1649   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1650   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1651   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1652   vnet_main_t * vnm = vnet_get_main();
1653   ip4_main_t * im = &ip4_main;
1654   uword indent = format_get_indent (s);
1655
1656   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1657               t->fib_index, t->adj_index, format_ip_adjacency,
1658               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1659   s = format (s, "\n%U%U",
1660               format_white_space, indent,
1661               format_ip_adjacency_packet_data,
1662               vnm, &im->lookup_main, t->adj_index,
1663               t->packet_data, sizeof (t->packet_data));
1664   return s;
1665 }
1666
1667 /* Common trace function for all ip4-forward next nodes. */
1668 void
1669 ip4_forward_next_trace (vlib_main_t * vm,
1670                         vlib_node_runtime_t * node,
1671                         vlib_frame_t * frame,
1672                         vlib_rx_or_tx_t which_adj_index)
1673 {
1674   u32 * from, n_left;
1675   ip4_main_t * im = &ip4_main;
1676
1677   n_left = frame->n_vectors;
1678   from = vlib_frame_vector_args (frame);
1679   
1680   while (n_left >= 4)
1681     {
1682       u32 bi0, bi1;
1683       vlib_buffer_t * b0, * b1;
1684       ip4_forward_next_trace_t * t0, * t1;
1685
1686       /* Prefetch next iteration. */
1687       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1688       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1689
1690       bi0 = from[0];
1691       bi1 = from[1];
1692
1693       b0 = vlib_get_buffer (vm, bi0);
1694       b1 = vlib_get_buffer (vm, bi1);
1695
1696       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1697         {
1698           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1699           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1700           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1701           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1702               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1703               vec_elt (im->fib_index_by_sw_if_index,
1704                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1705
1706           clib_memcpy (t0->packet_data,
1707                   vlib_buffer_get_current (b0),
1708                   sizeof (t0->packet_data));
1709         }
1710       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1711         {
1712           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1713           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1714           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1715           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1716               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1717               vec_elt (im->fib_index_by_sw_if_index,
1718                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1719           clib_memcpy (t1->packet_data,
1720                   vlib_buffer_get_current (b1),
1721                   sizeof (t1->packet_data));
1722         }
1723       from += 2;
1724       n_left -= 2;
1725     }
1726
1727   while (n_left >= 1)
1728     {
1729       u32 bi0;
1730       vlib_buffer_t * b0;
1731       ip4_forward_next_trace_t * t0;
1732
1733       bi0 = from[0];
1734
1735       b0 = vlib_get_buffer (vm, bi0);
1736
1737       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1738         {
1739           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1740           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1741           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1742           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1743               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1744               vec_elt (im->fib_index_by_sw_if_index,
1745                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1746           clib_memcpy (t0->packet_data,
1747                   vlib_buffer_get_current (b0),
1748                   sizeof (t0->packet_data));
1749         }
1750       from += 1;
1751       n_left -= 1;
1752     }
1753 }
1754
1755 static uword
1756 ip4_drop_or_punt (vlib_main_t * vm,
1757                   vlib_node_runtime_t * node,
1758                   vlib_frame_t * frame,
1759                   ip4_error_t error_code)
1760 {
1761   u32 * buffers = vlib_frame_vector_args (frame);
1762   uword n_packets = frame->n_vectors;
1763
1764   vlib_error_drop_buffers (vm, node,
1765                            buffers,
1766                            /* stride */ 1,
1767                            n_packets,
1768                            /* next */ 0,
1769                            ip4_input_node.index,
1770                            error_code);
1771
1772   if (node->flags & VLIB_NODE_FLAG_TRACE)
1773     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1774
1775   return n_packets;
1776 }
1777
1778 static uword
1779 ip4_drop (vlib_main_t * vm,
1780           vlib_node_runtime_t * node,
1781           vlib_frame_t * frame)
1782 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1783
1784 static uword
1785 ip4_punt (vlib_main_t * vm,
1786           vlib_node_runtime_t * node,
1787           vlib_frame_t * frame)
1788 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1789
1790 static uword
1791 ip4_miss (vlib_main_t * vm,
1792           vlib_node_runtime_t * node,
1793           vlib_frame_t * frame)
1794 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1795
1796 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1797   .function = ip4_drop,
1798   .name = "ip4-drop",
1799   .vector_size = sizeof (u32),
1800
1801   .format_trace = format_ip4_forward_next_trace,
1802
1803   .n_next_nodes = 1,
1804   .next_nodes = {
1805     [0] = "error-drop",
1806   },
1807 };
1808
1809 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1810
1811 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1812   .function = ip4_punt,
1813   .name = "ip4-punt",
1814   .vector_size = sizeof (u32),
1815
1816   .format_trace = format_ip4_forward_next_trace,
1817
1818   .n_next_nodes = 1,
1819   .next_nodes = {
1820     [0] = "error-punt",
1821   },
1822 };
1823
1824 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1825
1826 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1827   .function = ip4_miss,
1828   .name = "ip4-miss",
1829   .vector_size = sizeof (u32),
1830
1831   .format_trace = format_ip4_forward_next_trace,
1832
1833   .n_next_nodes = 1,
1834   .next_nodes = {
1835     [0] = "error-drop",
1836   },
1837 };
1838
1839 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1840
1841 /* Compute TCP/UDP/ICMP4 checksum in software. */
1842 u16
1843 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1844                               ip4_header_t * ip0)
1845 {
1846   ip_csum_t sum0;
1847   u32 ip_header_length, payload_length_host_byte_order;
1848   u32 n_this_buffer, n_bytes_left;
1849   u16 sum16;
1850   void * data_this_buffer;
1851   
1852   /* Initialize checksum with ip header. */
1853   ip_header_length = ip4_header_bytes (ip0);
1854   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1855   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1856
1857   if (BITS (uword) == 32)
1858     {
1859       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1860       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1861     }
1862   else
1863     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1864
1865   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1866   data_this_buffer = (void *) ip0 + ip_header_length;
1867   if (n_this_buffer + ip_header_length > p0->current_length)
1868     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1869   while (1)
1870     {
1871       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1872       n_bytes_left -= n_this_buffer;
1873       if (n_bytes_left == 0)
1874         break;
1875
1876       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1877       p0 = vlib_get_buffer (vm, p0->next_buffer);
1878       data_this_buffer = vlib_buffer_get_current (p0);
1879       n_this_buffer = p0->current_length;
1880     }
1881
1882   sum16 = ~ ip_csum_fold (sum0);
1883
1884   return sum16;
1885 }
1886
1887 static u32
1888 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1889 {
1890   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1891   udp_header_t * udp0;
1892   u16 sum16;
1893
1894   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1895           || ip0->protocol == IP_PROTOCOL_UDP);
1896
1897   udp0 = (void *) (ip0 + 1);
1898   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1899     {
1900       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1901                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1902       return p0->flags;
1903     }
1904
1905   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1906
1907   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1908                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1909
1910   return p0->flags;
1911 }
1912
1913 static uword
1914 ip4_local (vlib_main_t * vm,
1915            vlib_node_runtime_t * node,
1916            vlib_frame_t * frame)
1917 {
1918   ip4_main_t * im = &ip4_main;
1919   ip_lookup_main_t * lm = &im->lookup_main;
1920   ip_local_next_t next_index;
1921   u32 * from, * to_next, n_left_from, n_left_to_next;
1922   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1923
1924   from = vlib_frame_vector_args (frame);
1925   n_left_from = frame->n_vectors;
1926   next_index = node->cached_next_index;
1927   
1928   if (node->flags & VLIB_NODE_FLAG_TRACE)
1929     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1930
1931   while (n_left_from > 0)
1932     {
1933       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1934
1935       while (n_left_from >= 4 && n_left_to_next >= 2)
1936         {
1937           vlib_buffer_t * p0, * p1;
1938           ip4_header_t * ip0, * ip1;
1939           udp_header_t * udp0, * udp1;
1940           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1941           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1942           ip_adjacency_t * adj0, * adj1;
1943           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1944           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1945           i32 len_diff0, len_diff1;
1946           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1947           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1948           u8 enqueue_code;
1949       
1950           pi0 = to_next[0] = from[0];
1951           pi1 = to_next[1] = from[1];
1952           from += 2;
1953           n_left_from -= 2;
1954           to_next += 2;
1955           n_left_to_next -= 2;
1956       
1957           p0 = vlib_get_buffer (vm, pi0);
1958           p1 = vlib_get_buffer (vm, pi1);
1959
1960           ip0 = vlib_buffer_get_current (p0);
1961           ip1 = vlib_buffer_get_current (p1);
1962
1963           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1964                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1965           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1966                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1967
1968           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1969           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1970
1971           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1972
1973           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1974           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1975
1976           /* Treat IP frag packets as "experimental" protocol for now
1977              until support of IP frag reassembly is implemented */
1978           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1979           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1980           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1981           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1982           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1983           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1984
1985           flags0 = p0->flags;
1986           flags1 = p1->flags;
1987
1988           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1989           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1990
1991           udp0 = ip4_next_header (ip0);
1992           udp1 = ip4_next_header (ip1);
1993
1994           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1995           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1996           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1997
1998           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1999           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2000
2001           /* Verify UDP length. */
2002           ip_len0 = clib_net_to_host_u16 (ip0->length);
2003           ip_len1 = clib_net_to_host_u16 (ip1->length);
2004           udp_len0 = clib_net_to_host_u16 (udp0->length);
2005           udp_len1 = clib_net_to_host_u16 (udp1->length);
2006
2007           len_diff0 = ip_len0 - udp_len0;
2008           len_diff1 = ip_len1 - udp_len1;
2009
2010           len_diff0 = is_udp0 ? len_diff0 : 0;
2011           len_diff1 = is_udp1 ? len_diff1 : 0;
2012
2013           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2014                                 & good_tcp_udp0 & good_tcp_udp1)))
2015             {
2016               if (is_tcp_udp0)
2017                 {
2018                   if (is_tcp_udp0
2019                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2020                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2021                   good_tcp_udp0 =
2022                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2023                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2024                 }
2025               if (is_tcp_udp1)
2026                 {
2027                   if (is_tcp_udp1
2028                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2029                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2030                   good_tcp_udp1 =
2031                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2032                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2033                 }
2034             }
2035
2036           good_tcp_udp0 &= len_diff0 >= 0;
2037           good_tcp_udp1 &= len_diff1 >= 0;
2038
2039           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2040           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2041
2042           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2043
2044           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2045           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2046
2047           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2048           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2049                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2050                     : error0);
2051           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2052                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2053                     : error1);
2054
2055           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2056           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2057
2058           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2059           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2060
2061           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2062           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2063
2064           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2065                                                            &ip0->src_address,
2066                                                            /* no_default_route */ 1));
2067           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2068                                                            &ip1->src_address,
2069                                                            /* no_default_route */ 1));
2070
2071           adj0 = ip_get_adjacency (lm, adj_index0);
2072           adj1 = ip_get_adjacency (lm, adj_index1);
2073
2074           /* 
2075            * Must have a route to source otherwise we drop the packet.
2076            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2077            */
2078           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2079                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2080                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2081                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2082                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2083                     ? IP4_ERROR_SRC_LOOKUP_MISS
2084                     : error0);
2085           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2086                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2087                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2088                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2089                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2090                     ? IP4_ERROR_SRC_LOOKUP_MISS
2091                     : error1);
2092
2093           next0 = lm->local_next_by_ip_protocol[proto0];
2094           next1 = lm->local_next_by_ip_protocol[proto1];
2095
2096           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2097           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2098
2099           p0->error = error0 ? error_node->errors[error0] : 0;
2100           p1->error = error1 ? error_node->errors[error1] : 0;
2101
2102           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2103
2104           if (PREDICT_FALSE (enqueue_code != 0))
2105             {
2106               switch (enqueue_code)
2107                 {
2108                 case 1:
2109                   /* A B A */
2110                   to_next[-2] = pi1;
2111                   to_next -= 1;
2112                   n_left_to_next += 1;
2113                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2114                   break;
2115
2116                 case 2:
2117                   /* A A B */
2118                   to_next -= 1;
2119                   n_left_to_next += 1;
2120                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2121                   break;
2122
2123                 case 3:
2124                   /* A B B or A B C */
2125                   to_next -= 2;
2126                   n_left_to_next += 2;
2127                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2128                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2129                   if (next0 == next1)
2130                     {
2131                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2132                       next_index = next1;
2133                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2134                     }
2135                   break;
2136                 }
2137             }
2138         }
2139
2140       while (n_left_from > 0 && n_left_to_next > 0)
2141         {
2142           vlib_buffer_t * p0;
2143           ip4_header_t * ip0;
2144           udp_header_t * udp0;
2145           ip4_fib_mtrie_t * mtrie0;
2146           ip4_fib_mtrie_leaf_t leaf0;
2147           ip_adjacency_t * adj0;
2148           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2149           i32 len_diff0;
2150           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2151       
2152           pi0 = to_next[0] = from[0];
2153           from += 1;
2154           n_left_from -= 1;
2155           to_next += 1;
2156           n_left_to_next -= 1;
2157       
2158           p0 = vlib_get_buffer (vm, pi0);
2159
2160           ip0 = vlib_buffer_get_current (p0);
2161
2162           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2163                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2164
2165           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2166
2167           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2168
2169           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2170
2171           /* Treat IP frag packets as "experimental" protocol for now
2172              until support of IP frag reassembly is implemented */
2173           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2174           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2175           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2176
2177           flags0 = p0->flags;
2178
2179           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2180
2181           udp0 = ip4_next_header (ip0);
2182
2183           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2184           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2185
2186           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2187
2188           /* Verify UDP length. */
2189           ip_len0 = clib_net_to_host_u16 (ip0->length);
2190           udp_len0 = clib_net_to_host_u16 (udp0->length);
2191
2192           len_diff0 = ip_len0 - udp_len0;
2193
2194           len_diff0 = is_udp0 ? len_diff0 : 0;
2195
2196           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2197             {
2198               if (is_tcp_udp0)
2199                 {
2200                   if (is_tcp_udp0
2201                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2202                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2203                   good_tcp_udp0 =
2204                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2205                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2206                 }
2207             }
2208
2209           good_tcp_udp0 &= len_diff0 >= 0;
2210
2211           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2212
2213           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2214
2215           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2216
2217           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2218           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2219                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2220                     : error0);
2221
2222           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2223
2224           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2225           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2226
2227           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2228                                                            &ip0->src_address,
2229                                                            /* no_default_route */ 1));
2230
2231           adj0 = ip_get_adjacency (lm, adj_index0);
2232
2233           /* Must have a route to source otherwise we drop the packet. */
2234           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2235                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2236                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2237                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2238                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2239                     ? IP4_ERROR_SRC_LOOKUP_MISS
2240                     : error0);
2241
2242           next0 = lm->local_next_by_ip_protocol[proto0];
2243
2244           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2245
2246           p0->error = error0? error_node->errors[error0] : 0;
2247
2248           if (PREDICT_FALSE (next0 != next_index))
2249             {
2250               n_left_to_next += 1;
2251               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2252
2253               next_index = next0;
2254               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2255               to_next[0] = pi0;
2256               to_next += 1;
2257               n_left_to_next -= 1;
2258             }
2259         }
2260   
2261       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2262     }
2263
2264   return frame->n_vectors;
2265 }
2266
2267 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2268   .function = ip4_local,
2269   .name = "ip4-local",
2270   .vector_size = sizeof (u32),
2271
2272   .format_trace = format_ip4_forward_next_trace,
2273
2274   .n_next_nodes = IP_LOCAL_N_NEXT,
2275   .next_nodes = {
2276     [IP_LOCAL_NEXT_DROP] = "error-drop",
2277     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2278     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2279     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2280   },
2281 };
2282
2283 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2284
2285 void ip4_register_protocol (u32 protocol, u32 node_index)
2286 {
2287   vlib_main_t * vm = vlib_get_main();
2288   ip4_main_t * im = &ip4_main;
2289   ip_lookup_main_t * lm = &im->lookup_main;
2290
2291   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2292   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2293 }
2294
2295 static clib_error_t *
2296 show_ip_local_command_fn (vlib_main_t * vm,
2297                           unformat_input_t * input,
2298                          vlib_cli_command_t * cmd)
2299 {
2300   ip4_main_t * im = &ip4_main;
2301   ip_lookup_main_t * lm = &im->lookup_main;
2302   int i;
2303
2304   vlib_cli_output (vm, "Protocols handled by ip4_local");
2305   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2306     {
2307       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2308         vlib_cli_output (vm, "%d", i);
2309     }
2310   return 0;
2311 }
2312
2313
2314
2315 VLIB_CLI_COMMAND (show_ip_local, static) = {
2316   .path = "show ip local",
2317   .function = show_ip_local_command_fn,
2318   .short_help = "Show ip local protocol table",
2319 };
2320
2321 static uword
2322 ip4_arp (vlib_main_t * vm,
2323          vlib_node_runtime_t * node,
2324          vlib_frame_t * frame)
2325 {
2326   vnet_main_t * vnm = vnet_get_main();
2327   ip4_main_t * im = &ip4_main;
2328   ip_lookup_main_t * lm = &im->lookup_main;
2329   u32 * from, * to_next_drop;
2330   uword n_left_from, n_left_to_next_drop, next_index;
2331   static f64 time_last_seed_change = -1e100;
2332   static u32 hash_seeds[3];
2333   static uword hash_bitmap[256 / BITS (uword)]; 
2334   f64 time_now;
2335
2336   if (node->flags & VLIB_NODE_FLAG_TRACE)
2337     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2338
2339   time_now = vlib_time_now (vm);
2340   if (time_now - time_last_seed_change > 1e-3)
2341     {
2342       uword i;
2343       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2344                                              sizeof (hash_seeds));
2345       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2346         hash_seeds[i] = r[i];
2347
2348       /* Mark all hash keys as been no-seen before. */
2349       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2350         hash_bitmap[i] = 0;
2351
2352       time_last_seed_change = time_now;
2353     }
2354
2355   from = vlib_frame_vector_args (frame);
2356   n_left_from = frame->n_vectors;
2357   next_index = node->cached_next_index;
2358   if (next_index == IP4_ARP_NEXT_DROP)
2359     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2360
2361   while (n_left_from > 0)
2362     {
2363       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2364                            to_next_drop, n_left_to_next_drop);
2365
2366       while (n_left_from > 0 && n_left_to_next_drop > 0)
2367         {
2368           vlib_buffer_t * p0;
2369           ip4_header_t * ip0;
2370           ethernet_header_t * eh0;
2371           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2372           uword bm0;
2373           ip_adjacency_t * adj0;
2374
2375           pi0 = from[0];
2376
2377           p0 = vlib_get_buffer (vm, pi0);
2378
2379           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2380           adj0 = ip_get_adjacency (lm, adj_index0);
2381           ip0 = vlib_buffer_get_current (p0);
2382
2383           /* If packet destination is not local, send ARP to next hop */
2384           if (adj0->arp.next_hop.ip4.as_u32)
2385             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2386
2387           /* 
2388            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2389            * rewrite to this packet, we need to skip it here.
2390            * Note, to distinguish from src IP addr *.8.6.*, we
2391            * check for a bcast eth dest instead of IPv4 version.
2392            */
2393           eh0 = (ethernet_header_t*)ip0;
2394           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2395             {
2396               u32 vlan_num = 0;
2397               u16 * etype = &eh0->type;
2398               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2399                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2400                 {
2401                   vlan_num += 1;
2402                   etype += 2; //vlan tag also 16 bits, same as etype
2403                 }
2404               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2405                 {
2406                   vlib_buffer_advance (
2407                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2408                   ip0 = vlib_buffer_get_current (p0);
2409                 }
2410             }
2411
2412           a0 = hash_seeds[0];
2413           b0 = hash_seeds[1];
2414           c0 = hash_seeds[2];
2415
2416           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2417           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2418
2419           a0 ^= ip0->dst_address.data_u32;
2420           b0 ^= sw_if_index0;
2421
2422           hash_v3_finalize32 (a0, b0, c0);
2423
2424           c0 &= BITS (hash_bitmap) - 1;
2425           c0 = c0 / BITS (uword);
2426           m0 = (uword) 1 << (c0 % BITS (uword));
2427
2428           bm0 = hash_bitmap[c0];
2429           drop0 = (bm0 & m0) != 0;
2430
2431           /* Mark it as seen. */
2432           hash_bitmap[c0] = bm0 | m0;
2433
2434           from += 1;
2435           n_left_from -= 1;
2436           to_next_drop[0] = pi0;
2437           to_next_drop += 1;
2438           n_left_to_next_drop -= 1;
2439
2440           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2441
2442           if (drop0)
2443             continue;
2444
2445           /* 
2446            * Can happen if the control-plane is programming tables
2447            * with traffic flowing; at least that's today's lame excuse.
2448            */
2449           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2450             {
2451               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2452             }
2453           else
2454           /* Send ARP request. */
2455           {
2456             u32 bi0 = 0;
2457             vlib_buffer_t * b0;
2458             ethernet_arp_header_t * h0;
2459             vnet_hw_interface_t * hw_if0;
2460
2461             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2462
2463             /* Add rewrite/encap string for ARP packet. */
2464             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2465
2466             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2467
2468             /* Src ethernet address in ARP header. */
2469             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2470                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2471
2472             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2473                 //No source address available
2474                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2475                 vlib_buffer_free(vm, &bi0, 1);
2476                 continue;
2477             }
2478
2479             /* Copy in destination address we are requesting. */
2480             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2481
2482             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2483             b0 = vlib_get_buffer (vm, bi0);
2484             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2485
2486             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2487
2488             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2489           }
2490         }
2491
2492       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2493     }
2494
2495   return frame->n_vectors;
2496 }
2497
2498 static char * ip4_arp_error_strings[] = {
2499   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2500   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2501   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2502   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2503   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2504   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2505 };
2506
2507 VLIB_REGISTER_NODE (ip4_arp_node) = {
2508   .function = ip4_arp,
2509   .name = "ip4-arp",
2510   .vector_size = sizeof (u32),
2511
2512   .format_trace = format_ip4_forward_next_trace,
2513
2514   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2515   .error_strings = ip4_arp_error_strings,
2516
2517   .n_next_nodes = IP4_ARP_N_NEXT,
2518   .next_nodes = {
2519     [IP4_ARP_NEXT_DROP] = "error-drop",
2520   },
2521 };
2522
2523 #define foreach_notrace_ip4_arp_error           \
2524 _(DROP)                                         \
2525 _(REQUEST_SENT)                                 \
2526 _(REPLICATE_DROP)                               \
2527 _(REPLICATE_FAIL)
2528
2529 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2530 {
2531   vlib_node_runtime_t *rt = 
2532     vlib_node_get_runtime (vm, ip4_arp_node.index);
2533
2534   /* don't trace ARP request packets */
2535 #define _(a)                                    \
2536     vnet_pcap_drop_trace_filter_add_del         \
2537         (rt->errors[IP4_ARP_ERROR_##a],         \
2538          1 /* is_add */);
2539     foreach_notrace_ip4_arp_error;
2540 #undef _
2541   return 0;
2542 }
2543
2544 VLIB_INIT_FUNCTION(arp_notrace_init);
2545
2546
2547 /* Send an ARP request to see if given destination is reachable on given interface. */
2548 clib_error_t *
2549 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2550 {
2551   vnet_main_t * vnm = vnet_get_main();
2552   ip4_main_t * im = &ip4_main;
2553   ethernet_arp_header_t * h;
2554   ip4_address_t * src;
2555   ip_interface_address_t * ia;
2556   ip_adjacency_t * adj;
2557   vnet_hw_interface_t * hi;
2558   vnet_sw_interface_t * si;
2559   vlib_buffer_t * b;
2560   u32 bi = 0;
2561
2562   si = vnet_get_sw_interface (vnm, sw_if_index);
2563
2564   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2565     {
2566       return clib_error_return (0, "%U: interface %U down",
2567                                 format_ip4_address, dst, 
2568                                 format_vnet_sw_if_index_name, vnm, 
2569                                 sw_if_index);
2570     }
2571
2572   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2573   if (! src)
2574     {
2575       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2576       return clib_error_return 
2577         (0, "no matching interface address for destination %U (interface %U)",
2578          format_ip4_address, dst,
2579          format_vnet_sw_if_index_name, vnm, sw_if_index);
2580     }
2581
2582   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2583
2584   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2585
2586   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2587
2588   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2589
2590   h->ip4_over_ethernet[0].ip4 = src[0];
2591   h->ip4_over_ethernet[1].ip4 = dst[0];
2592
2593   b = vlib_get_buffer (vm, bi);
2594   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2595
2596   /* Add encapsulation string for software interface (e.g. ethernet header). */
2597   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2598   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2599
2600   {
2601     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2602     u32 * to_next = vlib_frame_vector_args (f);
2603     to_next[0] = bi;
2604     f->n_vectors = 1;
2605     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2606   }
2607
2608   return /* no error */ 0;
2609 }
2610
2611 typedef enum {
2612   IP4_REWRITE_NEXT_DROP,
2613   IP4_REWRITE_NEXT_ARP,
2614   IP4_REWRITE_NEXT_ICMP_ERROR,
2615 } ip4_rewrite_next_t;
2616
2617 always_inline uword
2618 ip4_rewrite_inline (vlib_main_t * vm,
2619                     vlib_node_runtime_t * node,
2620                     vlib_frame_t * frame,
2621                     int rewrite_for_locally_received_packets)
2622 {
2623   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2624   u32 * from = vlib_frame_vector_args (frame);
2625   u32 n_left_from, n_left_to_next, * to_next, next_index;
2626   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2627   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2628
2629   n_left_from = frame->n_vectors;
2630   next_index = node->cached_next_index;
2631   u32 cpu_index = os_get_cpu_number();
2632   
2633   while (n_left_from > 0)
2634     {
2635       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2636
2637       while (n_left_from >= 4 && n_left_to_next >= 2)
2638         {
2639           ip_adjacency_t * adj0, * adj1;
2640           vlib_buffer_t * p0, * p1;
2641           ip4_header_t * ip0, * ip1;
2642           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2643           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2644           u32 next0_override, next1_override;
2645       
2646           if (rewrite_for_locally_received_packets)
2647               next0_override = next1_override = 0;
2648
2649           /* Prefetch next iteration. */
2650           {
2651             vlib_buffer_t * p2, * p3;
2652
2653             p2 = vlib_get_buffer (vm, from[2]);
2654             p3 = vlib_get_buffer (vm, from[3]);
2655
2656             vlib_prefetch_buffer_header (p2, STORE);
2657             vlib_prefetch_buffer_header (p3, STORE);
2658
2659             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2660             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2661           }
2662
2663           pi0 = to_next[0] = from[0];
2664           pi1 = to_next[1] = from[1];
2665
2666           from += 2;
2667           n_left_from -= 2;
2668           to_next += 2;
2669           n_left_to_next -= 2;
2670       
2671           p0 = vlib_get_buffer (vm, pi0);
2672           p1 = vlib_get_buffer (vm, pi1);
2673
2674           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2675           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2676
2677           /* We should never rewrite a pkt using the MISS adjacency */
2678           ASSERT(adj_index0 && adj_index1);
2679
2680           ip0 = vlib_buffer_get_current (p0);
2681           ip1 = vlib_buffer_get_current (p1);
2682
2683           error0 = error1 = IP4_ERROR_NONE;
2684           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2685
2686           /* Decrement TTL & update checksum.
2687              Works either endian, so no need for byte swap. */
2688           if (! rewrite_for_locally_received_packets)
2689             {
2690               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2691
2692               /* Input node should have reject packets with ttl 0. */
2693               ASSERT (ip0->ttl > 0);
2694               ASSERT (ip1->ttl > 0);
2695
2696               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2697               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2698
2699               checksum0 += checksum0 >= 0xffff;
2700               checksum1 += checksum1 >= 0xffff;
2701
2702               ip0->checksum = checksum0;
2703               ip1->checksum = checksum1;
2704
2705               ttl0 -= 1;
2706               ttl1 -= 1;
2707
2708               ip0->ttl = ttl0;
2709               ip1->ttl = ttl1;
2710
2711               /*
2712                * If the ttl drops below 1 when forwarding, generate
2713                * an ICMP response.
2714                */
2715               if (PREDICT_FALSE(ttl0 <= 0))
2716                 {
2717                   error0 = IP4_ERROR_TIME_EXPIRED;
2718                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2719                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2720                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2721                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2722                 }
2723               if (PREDICT_FALSE(ttl1 <= 0))
2724                 {
2725                   error1 = IP4_ERROR_TIME_EXPIRED;
2726                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2727                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2728                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2729                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2730                 }
2731
2732               /* Verify checksum. */
2733               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2734               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2735             }
2736
2737           /* Rewrite packet header and updates lengths. */
2738           adj0 = ip_get_adjacency (lm, adj_index0);
2739           adj1 = ip_get_adjacency (lm, adj_index1);
2740       
2741           if (rewrite_for_locally_received_packets)
2742             {
2743               /*
2744                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2745                * we end up here with a local adjacency in hand
2746                * The local adj rewrite data is 0xfefe on purpose.
2747                * Bad engineer, no donut for you.
2748                */
2749               if (PREDICT_FALSE(adj0->lookup_next_index 
2750                                 == IP_LOOKUP_NEXT_LOCAL))
2751                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2752               if (PREDICT_FALSE(adj0->lookup_next_index
2753                                 == IP_LOOKUP_NEXT_ARP))
2754                 next0_override = IP4_REWRITE_NEXT_ARP;
2755               if (PREDICT_FALSE(adj1->lookup_next_index 
2756                                 == IP_LOOKUP_NEXT_LOCAL))
2757                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2758               if (PREDICT_FALSE(adj1->lookup_next_index
2759                                 == IP_LOOKUP_NEXT_ARP))
2760                 next1_override = IP4_REWRITE_NEXT_ARP;
2761             }
2762
2763           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2764           rw_len0 = adj0[0].rewrite_header.data_bytes;
2765           rw_len1 = adj1[0].rewrite_header.data_bytes;
2766
2767           /* Check MTU of outgoing interface. */
2768           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2769                     ? IP4_ERROR_MTU_EXCEEDED
2770                     : error0);
2771           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2772                     ? IP4_ERROR_MTU_EXCEEDED
2773                     : error1);
2774
2775           next0 = (error0 == IP4_ERROR_NONE)
2776             ? adj0[0].rewrite_header.next_index : next0;
2777
2778           if (rewrite_for_locally_received_packets)
2779               next0 = next0 && next0_override ? next0_override : next0;
2780
2781           next1 = (error1 == IP4_ERROR_NONE)
2782             ? adj1[0].rewrite_header.next_index : next1;
2783
2784           if (rewrite_for_locally_received_packets)
2785               next1 = next1 && next1_override ? next1_override : next1;
2786
2787           /* 
2788            * We've already accounted for an ethernet_header_t elsewhere
2789            */
2790           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2791               vlib_increment_combined_counter 
2792                   (&lm->adjacency_counters,
2793                    cpu_index, adj_index0, 
2794                    /* packet increment */ 0,
2795                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2796
2797           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2798               vlib_increment_combined_counter 
2799                   (&lm->adjacency_counters,
2800                    cpu_index, adj_index1, 
2801                    /* packet increment */ 0,
2802                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2803
2804           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2805            * to see the IP headerr */
2806           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2807             {
2808               p0->current_data -= rw_len0;
2809               p0->current_length += rw_len0;
2810               p0->error = error_node->errors[error0];
2811               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2812                   adj0[0].rewrite_header.sw_if_index;
2813             }
2814           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2815             {
2816               p1->current_data -= rw_len1;
2817               p1->current_length += rw_len1;
2818               p1->error = error_node->errors[error1];
2819               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2820                   adj1[0].rewrite_header.sw_if_index;
2821             }
2822
2823           /* Guess we are only writing on simple Ethernet header. */
2824           vnet_rewrite_two_headers (adj0[0], adj1[0],
2825                                     ip0, ip1,
2826                                     sizeof (ethernet_header_t));
2827       
2828           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2829                                            to_next, n_left_to_next,
2830                                            pi0, pi1, next0, next1);
2831         }
2832
2833       while (n_left_from > 0 && n_left_to_next > 0)
2834         {
2835           ip_adjacency_t * adj0;
2836           vlib_buffer_t * p0;
2837           ip4_header_t * ip0;
2838           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2839           u32 next0_override;
2840       
2841           if (rewrite_for_locally_received_packets)
2842               next0_override = 0;
2843
2844           pi0 = to_next[0] = from[0];
2845
2846           p0 = vlib_get_buffer (vm, pi0);
2847
2848           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2849
2850           /* We should never rewrite a pkt using the MISS adjacency */
2851           ASSERT(adj_index0);
2852
2853           adj0 = ip_get_adjacency (lm, adj_index0);
2854       
2855           ip0 = vlib_buffer_get_current (p0);
2856
2857           error0 = IP4_ERROR_NONE;
2858           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2859
2860           /* Decrement TTL & update checksum. */
2861           if (! rewrite_for_locally_received_packets)
2862             {
2863               i32 ttl0 = ip0->ttl;
2864
2865               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2866
2867               checksum0 += checksum0 >= 0xffff;
2868
2869               ip0->checksum = checksum0;
2870
2871               ASSERT (ip0->ttl > 0);
2872
2873               ttl0 -= 1;
2874
2875               ip0->ttl = ttl0;
2876
2877               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2878
2879               if (PREDICT_FALSE(ttl0 <= 0))
2880                 {
2881                   /*
2882                    * If the ttl drops below 1 when forwarding, generate
2883                    * an ICMP response.
2884                    */
2885                   error0 = IP4_ERROR_TIME_EXPIRED;
2886                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2887                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2888                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2889                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2890                 }
2891             }
2892
2893           if (rewrite_for_locally_received_packets)
2894             {
2895               /*
2896                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2897                * we end up here with a local adjacency in hand
2898                * The local adj rewrite data is 0xfefe on purpose.
2899                * Bad engineer, no donut for you.
2900                */
2901               if (PREDICT_FALSE(adj0->lookup_next_index 
2902                                 == IP_LOOKUP_NEXT_LOCAL))
2903                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2904               /* 
2905                * We have to override the next_index in ARP adjacencies,
2906                * because they're set up for ip4-arp, not this node...
2907                */
2908               if (PREDICT_FALSE(adj0->lookup_next_index
2909                                 == IP_LOOKUP_NEXT_ARP))
2910                 next0_override = IP4_REWRITE_NEXT_ARP;
2911             }
2912
2913           /* Guess we are only writing on simple Ethernet header. */
2914           vnet_rewrite_one_header (adj0[0], ip0, 
2915                                    sizeof (ethernet_header_t));
2916           
2917           /* Update packet buffer attributes/set output interface. */
2918           rw_len0 = adj0[0].rewrite_header.data_bytes;
2919           
2920           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2921               vlib_increment_combined_counter 
2922                   (&lm->adjacency_counters,
2923                    cpu_index, adj_index0, 
2924                    /* packet increment */ 0,
2925                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2926           
2927           /* Check MTU of outgoing interface. */
2928           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2929                     > adj0[0].rewrite_header.max_l3_packet_bytes
2930                     ? IP4_ERROR_MTU_EXCEEDED
2931                     : error0);
2932
2933           p0->error = error_node->errors[error0];
2934
2935           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2936            * to see the IP headerr */
2937           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2938             {
2939               p0->current_data -= rw_len0;
2940               p0->current_length += rw_len0;
2941
2942               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2943                   adj0[0].rewrite_header.sw_if_index;
2944               next0 = adj0[0].rewrite_header.next_index;
2945             }
2946
2947           if (rewrite_for_locally_received_packets)
2948               next0 = next0 && next0_override ? next0_override : next0;
2949
2950           from += 1;
2951           n_left_from -= 1;
2952           to_next += 1;
2953           n_left_to_next -= 1;
2954       
2955           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2956                                            to_next, n_left_to_next,
2957                                            pi0, next0);
2958         }
2959   
2960       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2961     }
2962
2963   /* Need to do trace after rewrites to pick up new packet data. */
2964   if (node->flags & VLIB_NODE_FLAG_TRACE)
2965     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2966
2967   return frame->n_vectors;
2968 }
2969
2970
2971 /** @brief IPv4 transit rewrite node.
2972     @node ip4-rewrite-transit
2973
2974     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2975     header checksum, fetch the ip adjacency, check the outbound mtu,
2976     apply the adjacency rewrite, and send pkts to the adjacency
2977     rewrite header's rewrite_next_index.
2978
2979     @param vm vlib_main_t corresponding to the current thread
2980     @param node vlib_node_runtime_t
2981     @param frame vlib_frame_t whose contents should be dispatched
2982
2983     @par Graph mechanics: buffer metadata, next index usage
2984
2985     @em Uses:
2986     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2987         - the rewrite adjacency index
2988     - <code>adj->lookup_next_index</code>
2989         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2990           the packet will be dropped. 
2991     - <code>adj->rewrite_header</code>
2992         - Rewrite string length, rewrite string, next_index
2993
2994     @em Sets:
2995     - <code>b->current_data, b->current_length</code>
2996         - Updated net of applying the rewrite string
2997
2998     <em>Next Indices:</em>
2999     - <code> adj->rewrite_header.next_index </code>
3000       or @c error-drop 
3001 */
3002 static uword
3003 ip4_rewrite_transit (vlib_main_t * vm,
3004                      vlib_node_runtime_t * node,
3005                      vlib_frame_t * frame)
3006 {
3007   return ip4_rewrite_inline (vm, node, frame,
3008                              /* rewrite_for_locally_received_packets */ 0);
3009 }
3010
3011 /** @brief IPv4 local rewrite node.
3012     @node ip4-rewrite-local
3013
3014     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3015     the outbound interface mtu, apply the adjacency rewrite, and send
3016     pkts to the adjacency rewrite header's rewrite_next_index. Deal
3017     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3018     dst = interface addr."
3019
3020     @param vm vlib_main_t corresponding to the current thread
3021     @param node vlib_node_runtime_t
3022     @param frame vlib_frame_t whose contents should be dispatched
3023
3024     @par Graph mechanics: buffer metadata, next index usage
3025
3026     @em Uses:
3027     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3028         - the rewrite adjacency index
3029     - <code>adj->lookup_next_index</code>
3030         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3031           the packet will be dropped. 
3032     - <code>adj->rewrite_header</code>
3033         - Rewrite string length, rewrite string, next_index
3034
3035     @em Sets:
3036     - <code>b->current_data, b->current_length</code>
3037         - Updated net of applying the rewrite string
3038
3039     <em>Next Indices:</em>
3040     - <code> adj->rewrite_header.next_index </code>
3041       or @c error-drop 
3042 */
3043
3044 static uword
3045 ip4_rewrite_local (vlib_main_t * vm,
3046                    vlib_node_runtime_t * node,
3047                    vlib_frame_t * frame)
3048 {
3049   return ip4_rewrite_inline (vm, node, frame,
3050                              /* rewrite_for_locally_received_packets */ 1);
3051 }
3052
3053 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3054   .function = ip4_rewrite_transit,
3055   .name = "ip4-rewrite-transit",
3056   .vector_size = sizeof (u32),
3057
3058   .format_trace = format_ip4_rewrite_trace,
3059
3060   .n_next_nodes = 3,
3061   .next_nodes = {
3062     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3063     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3064     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3065   },
3066 };
3067
3068 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3069
3070 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3071   .function = ip4_rewrite_local,
3072   .name = "ip4-rewrite-local",
3073   .vector_size = sizeof (u32),
3074
3075   .sibling_of = "ip4-rewrite-transit",
3076
3077   .format_trace = format_ip4_rewrite_trace,
3078
3079   .n_next_nodes = 0,
3080 };
3081
3082 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3083
3084 static clib_error_t *
3085 add_del_interface_table (vlib_main_t * vm,
3086                          unformat_input_t * input,
3087                          vlib_cli_command_t * cmd)
3088 {
3089   vnet_main_t * vnm = vnet_get_main();
3090   clib_error_t * error = 0;
3091   u32 sw_if_index, table_id;
3092
3093   sw_if_index = ~0;
3094
3095   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3096     {
3097       error = clib_error_return (0, "unknown interface `%U'",
3098                                  format_unformat_error, input);
3099       goto done;
3100     }
3101
3102   if (unformat (input, "%d", &table_id))
3103     ;
3104   else
3105     {
3106       error = clib_error_return (0, "expected table id `%U'",
3107                                  format_unformat_error, input);
3108       goto done;
3109     }
3110
3111   {
3112     ip4_main_t * im = &ip4_main;
3113     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3114
3115     if (fib) 
3116       {
3117         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3118         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3119     }
3120   }
3121
3122  done:
3123   return error;
3124 }
3125
3126 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3127   .path = "set interface ip table",
3128   .function = add_del_interface_table,
3129   .short_help = "Add/delete FIB table id for interface",
3130 };
3131
3132
3133 static uword
3134 ip4_lookup_multicast (vlib_main_t * vm,
3135                       vlib_node_runtime_t * node,
3136                       vlib_frame_t * frame)
3137 {
3138   ip4_main_t * im = &ip4_main;
3139   ip_lookup_main_t * lm = &im->lookup_main;
3140   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3141   u32 n_left_from, n_left_to_next, * from, * to_next;
3142   ip_lookup_next_t next;
3143   u32 cpu_index = os_get_cpu_number();
3144
3145   from = vlib_frame_vector_args (frame);
3146   n_left_from = frame->n_vectors;
3147   next = node->cached_next_index;
3148
3149   while (n_left_from > 0)
3150     {
3151       vlib_get_next_frame (vm, node, next,
3152                            to_next, n_left_to_next);
3153
3154       while (n_left_from >= 4 && n_left_to_next >= 2)
3155         {
3156           vlib_buffer_t * p0, * p1;
3157           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3158           ip_lookup_next_t next0, next1;
3159           ip4_header_t * ip0, * ip1;
3160           ip_adjacency_t * adj0, * adj1;
3161           u32 fib_index0, fib_index1;
3162           u32 flow_hash_config0, flow_hash_config1;
3163
3164           /* Prefetch next iteration. */
3165           {
3166             vlib_buffer_t * p2, * p3;
3167
3168             p2 = vlib_get_buffer (vm, from[2]);
3169             p3 = vlib_get_buffer (vm, from[3]);
3170
3171             vlib_prefetch_buffer_header (p2, LOAD);
3172             vlib_prefetch_buffer_header (p3, LOAD);
3173
3174             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3175             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3176           }
3177
3178           pi0 = to_next[0] = from[0];
3179           pi1 = to_next[1] = from[1];
3180
3181           p0 = vlib_get_buffer (vm, pi0);
3182           p1 = vlib_get_buffer (vm, pi1);
3183
3184           ip0 = vlib_buffer_get_current (p0);
3185           ip1 = vlib_buffer_get_current (p1);
3186
3187           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3188           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3189           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3190             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3191           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3192             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3193
3194           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3195                                               &ip0->dst_address, p0);
3196           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3197                                               &ip1->dst_address, p1);
3198
3199           adj0 = ip_get_adjacency (lm, adj_index0);
3200           adj1 = ip_get_adjacency (lm, adj_index1);
3201
3202           next0 = adj0->lookup_next_index;
3203           next1 = adj1->lookup_next_index;
3204
3205           flow_hash_config0 = 
3206               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3207
3208           flow_hash_config1 = 
3209               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3210
3211           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3212               (ip0, flow_hash_config0);
3213                                                                   
3214           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3215               (ip1, flow_hash_config1);
3216
3217           ASSERT (adj0->n_adj > 0);
3218           ASSERT (adj1->n_adj > 0);
3219           ASSERT (is_pow2 (adj0->n_adj));
3220           ASSERT (is_pow2 (adj1->n_adj));
3221           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3222           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3223
3224           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3225           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3226
3227           if (1) /* $$$$$$ HACK FIXME */
3228           vlib_increment_combined_counter 
3229               (cm, cpu_index, adj_index0, 1,
3230                vlib_buffer_length_in_chain (vm, p0));
3231           if (1) /* $$$$$$ HACK FIXME */
3232           vlib_increment_combined_counter 
3233               (cm, cpu_index, adj_index1, 1,
3234                vlib_buffer_length_in_chain (vm, p1));
3235
3236           from += 2;
3237           to_next += 2;
3238           n_left_to_next -= 2;
3239           n_left_from -= 2;
3240
3241           wrong_next = (next0 != next) + 2*(next1 != next);
3242           if (PREDICT_FALSE (wrong_next != 0))
3243             {
3244               switch (wrong_next)
3245                 {
3246                 case 1:
3247                   /* A B A */
3248                   to_next[-2] = pi1;
3249                   to_next -= 1;
3250                   n_left_to_next += 1;
3251                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3252                   break;
3253
3254                 case 2:
3255                   /* A A B */
3256                   to_next -= 1;
3257                   n_left_to_next += 1;
3258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3259                   break;
3260
3261                 case 3:
3262                   /* A B C */
3263                   to_next -= 2;
3264                   n_left_to_next += 2;
3265                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3266                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3267                   if (next0 == next1)
3268                     {
3269                       /* A B B */
3270                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3271                       next = next1;
3272                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3273                     }
3274                 }
3275             }
3276         }
3277     
3278       while (n_left_from > 0 && n_left_to_next > 0)
3279         {
3280           vlib_buffer_t * p0;
3281           ip4_header_t * ip0;
3282           u32 pi0, adj_index0;
3283           ip_lookup_next_t next0;
3284           ip_adjacency_t * adj0;
3285           u32 fib_index0;
3286           u32 flow_hash_config0;
3287
3288           pi0 = from[0];
3289           to_next[0] = pi0;
3290
3291           p0 = vlib_get_buffer (vm, pi0);
3292
3293           ip0 = vlib_buffer_get_current (p0);
3294
3295           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3296                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3297           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3298               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3299           
3300           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3301                                               &ip0->dst_address, p0);
3302
3303           adj0 = ip_get_adjacency (lm, adj_index0);
3304
3305           next0 = adj0->lookup_next_index;
3306
3307           flow_hash_config0 = 
3308               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3309
3310           vnet_buffer (p0)->ip.flow_hash = 
3311             ip4_compute_flow_hash (ip0, flow_hash_config0);
3312
3313           ASSERT (adj0->n_adj > 0);
3314           ASSERT (is_pow2 (adj0->n_adj));
3315           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3316
3317           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3318
3319           if (1) /* $$$$$$ HACK FIXME */
3320               vlib_increment_combined_counter 
3321                   (cm, cpu_index, adj_index0, 1,
3322                    vlib_buffer_length_in_chain (vm, p0));
3323
3324           from += 1;
3325           to_next += 1;
3326           n_left_to_next -= 1;
3327           n_left_from -= 1;
3328
3329           if (PREDICT_FALSE (next0 != next))
3330             {
3331               n_left_to_next += 1;
3332               vlib_put_next_frame (vm, node, next, n_left_to_next);
3333               next = next0;
3334               vlib_get_next_frame (vm, node, next,
3335                                    to_next, n_left_to_next);
3336               to_next[0] = pi0;
3337               to_next += 1;
3338               n_left_to_next -= 1;
3339             }
3340         }
3341
3342       vlib_put_next_frame (vm, node, next, n_left_to_next);
3343     }
3344
3345   if (node->flags & VLIB_NODE_FLAG_TRACE)
3346       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3347
3348   return frame->n_vectors;
3349 }
3350
3351 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3352   .function = ip4_lookup_multicast,
3353   .name = "ip4-lookup-multicast",
3354   .vector_size = sizeof (u32),
3355   .sibling_of = "ip4-lookup",
3356   .format_trace = format_ip4_lookup_trace,
3357
3358   .n_next_nodes = 0,
3359 };
3360
3361 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3362
3363 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3364   .function = ip4_drop,
3365   .name = "ip4-multicast",
3366   .vector_size = sizeof (u32),
3367
3368   .format_trace = format_ip4_forward_next_trace,
3369
3370   .n_next_nodes = 1,
3371   .next_nodes = {
3372     [0] = "error-drop",
3373   },
3374 };
3375
3376 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3377 {
3378   ip4_main_t * im = &ip4_main;
3379   ip4_fib_mtrie_t * mtrie0;
3380   ip4_fib_mtrie_leaf_t leaf0;
3381   u32 adj_index0;
3382     
3383   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3384
3385   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3386   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3387   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3388   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3389   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3390   
3391   /* Handle default route. */
3392   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3393   
3394   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3395   
3396   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3397                                                   a, 
3398                                                   /* no_default_route */ 0);
3399 }
3400  
3401 static clib_error_t *
3402 test_lookup_command_fn (vlib_main_t * vm,
3403                         unformat_input_t * input,
3404                         vlib_cli_command_t * cmd)
3405 {
3406   u32 table_id = 0;
3407   f64 count = 1;
3408   u32 n;
3409   int i;
3410   ip4_address_t ip4_base_address;
3411   u64 errors = 0;
3412
3413   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3414       if (unformat (input, "table %d", &table_id))
3415         ;
3416       else if (unformat (input, "count %f", &count))
3417         ;
3418
3419       else if (unformat (input, "%U",
3420                          unformat_ip4_address, &ip4_base_address))
3421         ;
3422       else
3423         return clib_error_return (0, "unknown input `%U'",
3424                                   format_unformat_error, input);
3425   }
3426
3427   n = count;
3428
3429   for (i = 0; i < n; i++)
3430     {
3431       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3432         errors++;
3433
3434       ip4_base_address.as_u32 = 
3435         clib_host_to_net_u32 (1 + 
3436                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3437     }
3438
3439   if (errors) 
3440     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3441   else
3442     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3443
3444   return 0;
3445 }
3446
3447 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3448     .path = "test lookup",
3449     .short_help = "test lookup",
3450     .function = test_lookup_command_fn,
3451 };
3452
3453 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3454 {
3455   ip4_main_t * im4 = &ip4_main;
3456   ip4_fib_t * fib;
3457   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3458
3459   if (p == 0)
3460     return VNET_API_ERROR_NO_SUCH_FIB;
3461
3462   fib = vec_elt_at_index (im4->fibs, p[0]);
3463
3464   fib->flow_hash_config = flow_hash_config;
3465   return 0;
3466 }
3467  
3468 static clib_error_t *
3469 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3470                              unformat_input_t * input,
3471                              vlib_cli_command_t * cmd)
3472 {
3473   int matched = 0;
3474   u32 table_id = 0;
3475   u32 flow_hash_config = 0;
3476   int rv;
3477
3478   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3479     if (unformat (input, "table %d", &table_id))
3480       matched = 1;
3481 #define _(a,v) \
3482     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3483     foreach_flow_hash_bit
3484 #undef _
3485     else break;
3486   }
3487   
3488   if (matched == 0)
3489     return clib_error_return (0, "unknown input `%U'",
3490                               format_unformat_error, input);
3491   
3492   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3493   switch (rv)
3494     {
3495     case 0:
3496       break;
3497       
3498     case VNET_API_ERROR_NO_SUCH_FIB:
3499       return clib_error_return (0, "no such FIB table %d", table_id);
3500       
3501     default:
3502       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3503       break;
3504     }
3505   
3506   return 0;
3507 }
3508  
3509 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3510   .path = "set ip flow-hash",
3511   .short_help = 
3512   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3513   .function = set_ip_flow_hash_command_fn,
3514 };
3515  
3516 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3517                                  u32 table_index)
3518 {
3519   vnet_main_t * vnm = vnet_get_main();
3520   vnet_interface_main_t * im = &vnm->interface_main;
3521   ip4_main_t * ipm = &ip4_main;
3522   ip_lookup_main_t * lm = &ipm->lookup_main;
3523   vnet_classify_main_t * cm = &vnet_classify_main;
3524
3525   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3526     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3527
3528   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3529     return VNET_API_ERROR_NO_SUCH_ENTRY;
3530
3531   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3532   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3533
3534   return 0;
3535 }
3536
3537 static clib_error_t *
3538 set_ip_classify_command_fn (vlib_main_t * vm,
3539                             unformat_input_t * input,
3540                             vlib_cli_command_t * cmd)
3541 {
3542   u32 table_index = ~0;
3543   int table_index_set = 0;
3544   u32 sw_if_index = ~0;
3545   int rv;
3546   
3547   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3548     if (unformat (input, "table-index %d", &table_index))
3549       table_index_set = 1;
3550     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3551                        vnet_get_main(), &sw_if_index))
3552       ;
3553     else
3554       break;
3555   }
3556       
3557   if (table_index_set == 0)
3558     return clib_error_return (0, "classify table-index must be specified");
3559
3560   if (sw_if_index == ~0)
3561     return clib_error_return (0, "interface / subif must be specified");
3562
3563   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3564
3565   switch (rv)
3566     {
3567     case 0:
3568       break;
3569
3570     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3571       return clib_error_return (0, "No such interface");
3572
3573     case VNET_API_ERROR_NO_SUCH_ENTRY:
3574       return clib_error_return (0, "No such classifier table");
3575     }
3576   return 0;
3577 }
3578
3579 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3580     .path = "set ip classify",
3581     .short_help = 
3582     "set ip classify intfc <int> table-index <index>",
3583     .function = set_ip_classify_command_fn,
3584 };
3585