Document more nodes
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
197     }
198 }
199
200 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip4_fib_t * fib;
204   u32 dst_address, dst_address_length, adj_index, old_adj_index;
205   uword * hash, is_del;
206   ip4_add_del_route_callback_t * cb;
207
208   /* Either create new adjacency or use given one depending on arguments. */
209   if (a->n_add_adj > 0)
210     {
211       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
212       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
213     }
214   else
215     adj_index = a->adj_index;
216
217   dst_address = a->dst_address.data_u32;
218   dst_address_length = a->dst_address_length;
219   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
220
221   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
222   dst_address &= im->fib_masks[dst_address_length];
223
224   if (! fib->adj_index_by_dst_address[dst_address_length])
225     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
226
227   hash = fib->adj_index_by_dst_address[dst_address_length];
228
229   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
230
231   if (is_del)
232     {
233       fib->old_hash_values[0] = ~0;
234       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
235       fib->adj_index_by_dst_address[dst_address_length] = hash;
236
237       if (vec_len (im->add_del_route_callbacks) > 0
238           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
239         {
240           fib->new_hash_values[0] = ~0;
241           vec_foreach (cb, im->add_del_route_callbacks)
242             if ((a->flags & cb->required_flags) == cb->required_flags)
243               cb->function (im, cb->function_opaque,
244                             fib, a->flags,
245                             &a->dst_address, dst_address_length,
246                             fib->old_hash_values,
247                             fib->new_hash_values);
248         }
249     }
250   else
251     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
252                            adj_index);
253
254   old_adj_index = fib->old_hash_values[0];
255
256   /* Avoid spurious reference count increments */
257   if (old_adj_index == adj_index
258       && adj_index != ~0
259       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
260     {
261       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
262       if (adj->share_count > 0)
263         adj->share_count --;
264     }
265
266   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
267                                is_del ? old_adj_index : adj_index,
268                                is_del);
269
270   /* Delete old adjacency index if present and changed. */
271   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
272       && old_adj_index != ~0
273       && old_adj_index != adj_index)
274     ip_del_adjacency (lm, old_adj_index);
275 }
276
277 void
278 ip4_add_del_route_next_hop (ip4_main_t * im,
279                             u32 flags,
280                             ip4_address_t * dst_address,
281                             u32 dst_address_length,
282                             ip4_address_t * next_hop,
283                             u32 next_hop_sw_if_index,
284                             u32 next_hop_weight, u32 adj_index, 
285                             u32 explicit_fib_index)
286 {
287   vnet_main_t * vnm = vnet_get_main();
288   ip_lookup_main_t * lm = &im->lookup_main;
289   u32 fib_index;
290   ip4_fib_t * fib;
291   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
292   u32 dst_adj_index, nh_adj_index;
293   uword * dst_hash, * dst_result;
294   uword * nh_hash, * nh_result;
295   ip_adjacency_t * dst_adj;
296   ip_multipath_adjacency_t * old_mp, * new_mp;
297   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
298   int is_interface_next_hop;
299   clib_error_t * error = 0;
300
301   if (explicit_fib_index == (u32)~0)
302       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
303   else
304       fib_index = explicit_fib_index;
305
306   fib = vec_elt_at_index (im->fibs, fib_index);
307   
308   /* Lookup next hop to be added or deleted. */
309   is_interface_next_hop = next_hop->data_u32 == 0;
310   if (adj_index == (u32)~0)
311     {
312       if (is_interface_next_hop)
313         {
314           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
315           if (nh_result)
316             nh_adj_index = *nh_result;
317           else
318             {
319               ip_adjacency_t * adj;
320               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
321                                       &nh_adj_index);
322               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
323               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
324               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
325             }
326         }
327       else
328         {
329           nh_hash = fib->adj_index_by_dst_address[32];
330           nh_result = hash_get (nh_hash, next_hop->data_u32);
331           
332           /* Next hop must be known. */
333           if (! nh_result)
334             {
335               ip_adjacency_t * adj;
336
337               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
338                                                         next_hop, 0);
339               adj = ip_get_adjacency (lm, nh_adj_index);
340               /* if ARP interface adjacencty is present, we need to
341                  install ARP adjaceny for specific next hop */
342               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
343                   adj->arp.next_hop.ip4.as_u32 == 0)
344                 {
345                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
346                 }
347               else
348                 {
349                   /* Next hop is not known, so create indirect adj */
350                   ip_adjacency_t add_adj;
351                   memset (&add_adj, 0, sizeof(add_adj));
352                   add_adj.n_adj = 1;
353                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
354                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
355                   add_adj.explicit_fib_index = explicit_fib_index;
356                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
357                 }
358             }
359           else
360             nh_adj_index = *nh_result;
361         }
362     }
363   else
364     {
365       nh_adj_index = adj_index;
366     }
367   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
368   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
369
370   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
371   dst_result = hash_get (dst_hash, dst_address_u32);
372   if (dst_result)
373     {
374       dst_adj_index = dst_result[0];
375       dst_adj = ip_get_adjacency (lm, dst_adj_index);
376     }
377   else
378     {
379       /* For deletes destination must be known. */
380       if (is_del)
381         {
382           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
383           error = clib_error_return (0, "unknown destination %U/%d",
384                                      format_ip4_address, dst_address,
385                                      dst_address_length);
386           goto done;
387         }
388
389       dst_adj_index = ~0;
390       dst_adj = 0;
391     }
392
393   /* Ignore adds of X/32 with next hop of X. */
394   if (! is_del
395       && dst_address_length == 32
396       && dst_address->data_u32 == next_hop->data_u32 
397       && adj_index != (u32)~0)
398     {
399       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
400       error = clib_error_return (0, "prefix matches next hop %U/%d",
401                                  format_ip4_address, dst_address,
402                                  dst_address_length);
403       goto done;
404     }
405
406   /* Destination is not known and default weight is set so add route
407      to existing non-multipath adjacency */
408   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
409     {
410       /* create / delete additional mapping of existing adjacency */
411       ip4_add_del_route_args_t a;
412       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
413
414       a.table_index_or_table_id = fib_index;
415       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
416                  | IP4_ROUTE_FLAG_FIB_INDEX
417                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
418                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
419                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
420       a.dst_address = dst_address[0];
421       a.dst_address_length = dst_address_length;
422       a.adj_index = nh_adj_index;
423       a.add_adj = 0;
424       a.n_add_adj = 0;
425
426       ip4_add_del_route (im, &a);
427
428       /* adjust share count. This cannot be the only use of the adjacency */
429       nh_adj->share_count += is_del ? -1 : 1;
430         
431       goto done;
432     }
433
434   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
435
436   if (! ip_multipath_adjacency_add_del_next_hop
437       (lm, is_del,
438        old_mp_adj_index,
439        nh_adj_index,
440        next_hop_weight,
441        &new_mp_adj_index))
442     {
443       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
444       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
445                                  format_ip4_address, next_hop);
446       goto done;
447     }
448   
449   old_mp = new_mp = 0;
450   if (old_mp_adj_index != ~0)
451     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
452   if (new_mp_adj_index != ~0)
453     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
454
455   if (old_mp != new_mp)
456     {
457       ip4_add_del_route_args_t a;
458       ip_adjacency_t * adj;
459
460       a.table_index_or_table_id = fib_index;
461       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
462                  | IP4_ROUTE_FLAG_FIB_INDEX
463                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
464                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
465       a.dst_address = dst_address[0];
466       a.dst_address_length = dst_address_length;
467       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
468       a.add_adj = 0;
469       a.n_add_adj = 0;
470
471       ip4_add_del_route (im, &a);
472
473       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
474       if (adj->n_adj == 1)
475         adj->share_count += is_del ? -1 : 1;
476     }
477
478  done:
479   if (error)
480     clib_error_report (error);
481 }
482
483 void *
484 ip4_get_route (ip4_main_t * im,
485                u32 table_index_or_table_id,
486                u32 flags,
487                u8 * address,
488                u32 address_length)
489 {
490   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
491   u32 dst_address = * (u32 *) address;
492   uword * hash, * p;
493
494   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
495   dst_address &= im->fib_masks[address_length];
496
497   hash = fib->adj_index_by_dst_address[address_length];
498   p = hash_get (hash, dst_address);
499   return (void *) p;
500 }
501
502 void
503 ip4_foreach_matching_route (ip4_main_t * im,
504                             u32 table_index_or_table_id,
505                             u32 flags,
506                             ip4_address_t * address,
507                             u32 address_length,
508                             ip4_address_t ** results,
509                             u8 ** result_lengths)
510 {
511   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
512   u32 dst_address = address->data_u32;
513   u32 this_length = address_length;
514   
515   if (*results)
516     _vec_len (*results) = 0;
517   if (*result_lengths)
518     _vec_len (*result_lengths) = 0;
519
520   while (this_length <= 32 && vec_len (results) == 0)
521     {
522       uword k, v;
523       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
524         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
525           {
526             ip4_address_t a;
527             a.data_u32 = k;
528             vec_add1 (*results, a);
529             vec_add1 (*result_lengths, this_length);
530           }
531       }));
532
533       this_length++;
534     }
535 }
536
537 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
538                                   u32 table_index_or_table_id,
539                                   u32 flags)
540 {
541   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
542   ip_lookup_main_t * lm = &im->lookup_main;
543   u32 i, l;
544   ip4_address_t a;
545   ip4_add_del_route_callback_t * cb;
546   static ip4_address_t * to_delete;
547
548   if (lm->n_adjacency_remaps == 0)
549     return;
550
551   for (l = 0; l <= 32; l++)
552     {
553       hash_pair_t * p;
554       uword * hash = fib->adj_index_by_dst_address[l];
555
556       if (hash_elts (hash) == 0)
557         continue;
558
559       if (to_delete)
560         _vec_len (to_delete) = 0;
561
562       hash_foreach_pair (p, hash, ({
563         u32 adj_index = p->value[0];
564         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
565
566         if (m)
567           {
568             /* Record destination address from hash key. */
569             a.data_u32 = p->key;
570
571             /* New adjacency points to nothing: so delete prefix. */
572             if (m == ~0)
573               vec_add1 (to_delete, a);
574             else
575               {
576                 /* Remap to new adjacency. */
577                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
578
579                 /* Set new adjacency value. */
580                 fib->new_hash_values[0] = p->value[0] = m - 1;
581
582                 vec_foreach (cb, im->add_del_route_callbacks)
583                   if ((flags & cb->required_flags) == cb->required_flags)
584                     cb->function (im, cb->function_opaque,
585                                   fib, flags | IP4_ROUTE_FLAG_ADD,
586                                   &a, l,
587                                   fib->old_hash_values,
588                                   fib->new_hash_values);
589               }
590           }
591       }));
592
593       fib->new_hash_values[0] = ~0;
594       for (i = 0; i < vec_len (to_delete); i++)
595         {
596           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
597           vec_foreach (cb, im->add_del_route_callbacks)
598             if ((flags & cb->required_flags) == cb->required_flags)
599               cb->function (im, cb->function_opaque,
600                             fib, flags | IP4_ROUTE_FLAG_DEL,
601                             &a, l,
602                             fib->old_hash_values,
603                             fib->new_hash_values);
604         }
605     }
606
607   /* Also remap adjacencies in mtrie. */
608   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
609
610   /* Reset mapping table. */
611   vec_zero (lm->adjacency_remap_table);
612
613   /* All remaps have been performed. */
614   lm->n_adjacency_remaps = 0;
615 }
616
617 void ip4_delete_matching_routes (ip4_main_t * im,
618                                  u32 table_index_or_table_id,
619                                  u32 flags,
620                                  ip4_address_t * address,
621                                  u32 address_length)
622 {
623   static ip4_address_t * matching_addresses;
624   static u8 * matching_address_lengths;
625   u32 l, i;
626   ip4_add_del_route_args_t a;
627
628   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
629   a.table_index_or_table_id = table_index_or_table_id;
630   a.adj_index = ~0;
631   a.add_adj = 0;
632   a.n_add_adj = 0;
633
634   for (l = address_length + 1; l <= 32; l++)
635     {
636       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
637                                   address,
638                                   l,
639                                   &matching_addresses,
640                                   &matching_address_lengths);
641       for (i = 0; i < vec_len (matching_addresses); i++)
642         {
643           a.dst_address = matching_addresses[i];
644           a.dst_address_length = matching_address_lengths[i];
645           ip4_add_del_route (im, &a);
646         }
647     }
648
649   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
650 }
651
652 void
653 ip4_forward_next_trace (vlib_main_t * vm,
654                         vlib_node_runtime_t * node,
655                         vlib_frame_t * frame,
656                         vlib_rx_or_tx_t which_adj_index);
657
658 always_inline uword
659 ip4_lookup_inline (vlib_main_t * vm,
660                    vlib_node_runtime_t * node,
661                    vlib_frame_t * frame,
662                    int lookup_for_responses_to_locally_received_packets,
663                    int is_indirect)
664 {
665   ip4_main_t * im = &ip4_main;
666   ip_lookup_main_t * lm = &im->lookup_main;
667   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
668   u32 n_left_from, n_left_to_next, * from, * to_next;
669   ip_lookup_next_t next;
670   u32 cpu_index = os_get_cpu_number();
671
672   from = vlib_frame_vector_args (frame);
673   n_left_from = frame->n_vectors;
674   next = node->cached_next_index;
675
676   while (n_left_from > 0)
677     {
678       vlib_get_next_frame (vm, node, next,
679                            to_next, n_left_to_next);
680
681       while (n_left_from >= 4 && n_left_to_next >= 2)
682         {
683           vlib_buffer_t * p0, * p1;
684           ip4_header_t * ip0, * ip1;
685           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
686           ip_lookup_next_t next0, next1;
687           ip_adjacency_t * adj0, * adj1;
688           ip4_fib_mtrie_t * mtrie0, * mtrie1;
689           ip4_fib_mtrie_leaf_t leaf0, leaf1;
690           ip4_address_t * dst_addr0, *dst_addr1;
691           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
692           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
693           u32 flow_hash_config0, flow_hash_config1;
694           u32 hash_c0, hash_c1;
695           u32 wrong_next;
696
697           /* Prefetch next iteration. */
698           {
699             vlib_buffer_t * p2, * p3;
700
701             p2 = vlib_get_buffer (vm, from[2]);
702             p3 = vlib_get_buffer (vm, from[3]);
703
704             vlib_prefetch_buffer_header (p2, LOAD);
705             vlib_prefetch_buffer_header (p3, LOAD);
706
707             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
708             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
709           }
710
711           pi0 = to_next[0] = from[0];
712           pi1 = to_next[1] = from[1];
713
714           p0 = vlib_get_buffer (vm, pi0);
715           p1 = vlib_get_buffer (vm, pi1);
716
717           ip0 = vlib_buffer_get_current (p0);
718           ip1 = vlib_buffer_get_current (p1);
719
720           if (is_indirect)
721             {
722               ip_adjacency_t * iadj0, * iadj1;
723               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
724               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
725               dst_addr0 = &iadj0->indirect.next_hop.ip4;
726               dst_addr1 = &iadj1->indirect.next_hop.ip4;
727             }
728           else
729             {
730               dst_addr0 = &ip0->dst_address;
731               dst_addr1 = &ip1->dst_address;
732             }
733
734           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
735           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
736           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
737             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
738           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
739             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
740
741
742           if (! lookup_for_responses_to_locally_received_packets)
743             {
744               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
745               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
746
747               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
748
749               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
750               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
751             }
752
753           tcp0 = (void *) (ip0 + 1);
754           tcp1 = (void *) (ip1 + 1);
755
756           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
757                          || ip0->protocol == IP_PROTOCOL_UDP);
758           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
759                          || ip1->protocol == IP_PROTOCOL_UDP);
760
761           if (! lookup_for_responses_to_locally_received_packets)
762             {
763               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
764               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
765             }
766
767           if (! lookup_for_responses_to_locally_received_packets)
768             {
769               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
770               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
771             }
772
773           if (! lookup_for_responses_to_locally_received_packets)
774             {
775               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
776               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
777             }
778
779           if (lookup_for_responses_to_locally_received_packets)
780             {
781               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
782               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
783             }
784           else
785             {
786               /* Handle default route. */
787               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
788               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
789
790               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
791               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
792             }
793
794           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
795                                                            dst_addr0,
796                                                            /* no_default_route */ 0));
797           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
798                                                            dst_addr1,
799                                                            /* no_default_route */ 0));
800           adj0 = ip_get_adjacency (lm, adj_index0);
801           adj1 = ip_get_adjacency (lm, adj_index1);
802
803           next0 = adj0->lookup_next_index;
804           next1 = adj1->lookup_next_index;
805
806           /* Use flow hash to compute multipath adjacency. */
807           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
808           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
809           if (PREDICT_FALSE (adj0->n_adj > 1))
810             {
811               flow_hash_config0 = 
812                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
813               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
814                 ip4_compute_flow_hash (ip0, flow_hash_config0);
815             }
816           if (PREDICT_FALSE(adj1->n_adj > 1))
817             {
818               flow_hash_config1 = 
819                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
820               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
821                 ip4_compute_flow_hash (ip1, flow_hash_config1);
822             }
823
824           ASSERT (adj0->n_adj > 0);
825           ASSERT (adj1->n_adj > 0);
826           ASSERT (is_pow2 (adj0->n_adj));
827           ASSERT (is_pow2 (adj1->n_adj));
828           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
829           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
830
831           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
832           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
833
834           vlib_increment_combined_counter 
835               (cm, cpu_index, adj_index0, 1,
836                vlib_buffer_length_in_chain (vm, p0) 
837                + sizeof(ethernet_header_t));
838           vlib_increment_combined_counter 
839               (cm, cpu_index, adj_index1, 1,
840                vlib_buffer_length_in_chain (vm, p1)
841                + sizeof(ethernet_header_t));
842
843           from += 2;
844           to_next += 2;
845           n_left_to_next -= 2;
846           n_left_from -= 2;
847
848           wrong_next = (next0 != next) + 2*(next1 != next);
849           if (PREDICT_FALSE (wrong_next != 0))
850             {
851               switch (wrong_next)
852                 {
853                 case 1:
854                   /* A B A */
855                   to_next[-2] = pi1;
856                   to_next -= 1;
857                   n_left_to_next += 1;
858                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
859                   break;
860
861                 case 2:
862                   /* A A B */
863                   to_next -= 1;
864                   n_left_to_next += 1;
865                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
866                   break;
867
868                 case 3:
869                   /* A B C */
870                   to_next -= 2;
871                   n_left_to_next += 2;
872                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
873                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
874                   if (next0 == next1)
875                     {
876                       /* A B B */
877                       vlib_put_next_frame (vm, node, next, n_left_to_next);
878                       next = next1;
879                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
880                     }
881                 }
882             }
883         }
884     
885       while (n_left_from > 0 && n_left_to_next > 0)
886         {
887           vlib_buffer_t * p0;
888           ip4_header_t * ip0;
889           __attribute__((unused)) tcp_header_t * tcp0;
890           ip_lookup_next_t next0;
891           ip_adjacency_t * adj0;
892           ip4_fib_mtrie_t * mtrie0;
893           ip4_fib_mtrie_leaf_t leaf0;
894           ip4_address_t * dst_addr0;
895           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
896           u32 flow_hash_config0, hash_c0;
897
898           pi0 = from[0];
899           to_next[0] = pi0;
900
901           p0 = vlib_get_buffer (vm, pi0);
902
903           ip0 = vlib_buffer_get_current (p0);
904
905           if (is_indirect)
906             {
907               ip_adjacency_t * iadj0;
908               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
909               dst_addr0 = &iadj0->indirect.next_hop.ip4;
910             }
911           else
912             {
913               dst_addr0 = &ip0->dst_address;
914             }
915
916           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
917           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
918             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
919
920           if (! lookup_for_responses_to_locally_received_packets)
921             {
922               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
923
924               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
925
926               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
927             }
928
929           tcp0 = (void *) (ip0 + 1);
930
931           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
932                          || ip0->protocol == IP_PROTOCOL_UDP);
933
934           if (! lookup_for_responses_to_locally_received_packets)
935             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
936
937           if (! lookup_for_responses_to_locally_received_packets)
938             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
939
940           if (! lookup_for_responses_to_locally_received_packets)
941             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
942
943           if (lookup_for_responses_to_locally_received_packets)
944             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
945           else
946             {
947               /* Handle default route. */
948               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
949               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
950             }
951
952           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
953                                                            dst_addr0,
954                                                            /* no_default_route */ 0));
955
956           adj0 = ip_get_adjacency (lm, adj_index0);
957
958           next0 = adj0->lookup_next_index;
959
960           /* Use flow hash to compute multipath adjacency. */
961           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
962           if (PREDICT_FALSE(adj0->n_adj > 1))
963             {
964               flow_hash_config0 = 
965                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
966
967               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
968                 ip4_compute_flow_hash (ip0, flow_hash_config0);
969             }
970
971           ASSERT (adj0->n_adj > 0);
972           ASSERT (is_pow2 (adj0->n_adj));
973           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
974
975           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
976
977           vlib_increment_combined_counter 
978               (cm, cpu_index, adj_index0, 1,
979                vlib_buffer_length_in_chain (vm, p0)
980                + sizeof(ethernet_header_t));
981
982           from += 1;
983           to_next += 1;
984           n_left_to_next -= 1;
985           n_left_from -= 1;
986
987           if (PREDICT_FALSE (next0 != next))
988             {
989               n_left_to_next += 1;
990               vlib_put_next_frame (vm, node, next, n_left_to_next);
991               next = next0;
992               vlib_get_next_frame (vm, node, next,
993                                    to_next, n_left_to_next);
994               to_next[0] = pi0;
995               to_next += 1;
996               n_left_to_next -= 1;
997             }
998         }
999
1000       vlib_put_next_frame (vm, node, next, n_left_to_next);
1001     }
1002
1003   if (node->flags & VLIB_NODE_FLAG_TRACE)
1004     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1005
1006   return frame->n_vectors;
1007 }
1008
1009 /** \brief IPv4 lookup node.
1010     @node ip4-lookup
1011
1012     This is the main IPv4 lookup dispatch node.
1013
1014     @param vm vlib_main_t corresponding to the current thread
1015     @param node vlib_node_runtime_t
1016     @param frame vlib_frame_t whose contents should be dispatched
1017
1018     @par Graph mechanics: buffer metadata, next index usage
1019
1020     @em Uses:
1021     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1022         - Indicates the @c sw_if_index value of the interface that the
1023           packet was received on.
1024     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1025         - When the value is @c ~0 then the node performs a longest prefix
1026           match (LPM) for the packet destination address in the FIB attached
1027           to the receive interface.
1028         - Otherwise perform LPM for the packet destination address in the
1029           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1030           value (0, 1, ...) and not a VRF id.
1031
1032     @em Sets:
1033     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1034         - The lookup result adjacency index.
1035
1036     <em>Next Index:</em>
1037     - Dispatches the packet to the node index found in
1038       ip_adjacency_t @c adj->lookup_next_index
1039       (where @c adj is the lookup result adjacency).
1040 */
1041 static uword
1042 ip4_lookup (vlib_main_t * vm,
1043             vlib_node_runtime_t * node,
1044             vlib_frame_t * frame)
1045 {
1046   return ip4_lookup_inline (vm, node, frame,
1047                             /* lookup_for_responses_to_locally_received_packets */ 0,
1048                             /* is_indirect */ 0);
1049
1050 }
1051
1052 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1053                                         ip_adjacency_t * adj,
1054                                         u32 sw_if_index,
1055                                         u32 if_address_index)
1056 {
1057   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1058   ip_lookup_next_t n;
1059   vnet_l3_packet_type_t packet_type;
1060   u32 node_index;
1061
1062   if (hw->hw_class_index == ethernet_hw_interface_class.index
1063       || hw->hw_class_index == srp_hw_interface_class.index)
1064     {
1065       /* 
1066        * We have a bit of a problem in this case. ip4-arp uses
1067        * the rewrite_header.next_index to hand pkts to the
1068        * indicated inteface output node. We can end up in
1069        * ip4_rewrite_local, too, which also pays attention to 
1070        * rewrite_header.next index. Net result: a hack in
1071        * ip4_rewrite_local...
1072        */
1073       n = IP_LOOKUP_NEXT_ARP;
1074       node_index = ip4_arp_node.index;
1075       adj->if_address_index = if_address_index;
1076       adj->arp.next_hop.ip4.as_u32 = 0;
1077       ip46_address_reset(&adj->arp.next_hop);
1078       packet_type = VNET_L3_PACKET_TYPE_ARP;
1079     }
1080   else
1081     {
1082       n = IP_LOOKUP_NEXT_REWRITE;
1083       node_index = ip4_rewrite_node.index;
1084       packet_type = VNET_L3_PACKET_TYPE_IP4;
1085     }
1086
1087   adj->lookup_next_index = n;
1088   vnet_rewrite_for_sw_interface
1089     (vnm,
1090      packet_type,
1091      sw_if_index,
1092      node_index,
1093      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1094      &adj->rewrite_header,
1095      sizeof (adj->rewrite_data));
1096 }
1097
1098 static void
1099 ip4_add_interface_routes (u32 sw_if_index,
1100                           ip4_main_t * im, u32 fib_index,
1101                           ip_interface_address_t * a)
1102 {
1103   vnet_main_t * vnm = vnet_get_main();
1104   ip_lookup_main_t * lm = &im->lookup_main;
1105   ip_adjacency_t * adj;
1106   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1107   ip4_add_del_route_args_t x;
1108   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1109   u32 classify_table_index;
1110
1111   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1112   x.table_index_or_table_id = fib_index;
1113   x.flags = (IP4_ROUTE_FLAG_ADD
1114              | IP4_ROUTE_FLAG_FIB_INDEX
1115              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1116   x.dst_address = address[0];
1117   x.dst_address_length = a->address_length;
1118   x.n_add_adj = 0;
1119   x.add_adj = 0;
1120
1121   a->neighbor_probe_adj_index = ~0;
1122   if (a->address_length < 32)
1123     {
1124       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1125                               &x.adj_index);
1126       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1127       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1128       ip4_add_del_route (im, &x);
1129       a->neighbor_probe_adj_index = x.adj_index;
1130     }
1131   
1132   /* Add e.g. 1.1.1.1/32 as local to this host. */
1133   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1134                           &x.adj_index);
1135   
1136   classify_table_index = ~0;
1137   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1138     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1139   if (classify_table_index != (u32) ~0)
1140     {
1141       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1142       adj->classify.table_index = classify_table_index;
1143     }
1144   else
1145     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1146   
1147   adj->if_address_index = a - lm->if_address_pool;
1148   adj->rewrite_header.sw_if_index = sw_if_index;
1149   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1150   /* 
1151    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1152    * fail an RPF-ish check, but still go thru the rewrite code...
1153    */
1154   adj->rewrite_header.data_bytes = 0;
1155
1156   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1157   x.dst_address_length = 32;
1158   ip4_add_del_route (im, &x);
1159 }
1160
1161 static void
1162 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1163 {
1164   ip4_add_del_route_args_t x;
1165
1166   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1167   x.table_index_or_table_id = fib_index;
1168   x.flags = (IP4_ROUTE_FLAG_DEL
1169              | IP4_ROUTE_FLAG_FIB_INDEX
1170              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1171   x.dst_address = address[0];
1172   x.dst_address_length = address_length;
1173   x.adj_index = ~0;
1174   x.n_add_adj = 0;
1175   x.add_adj = 0;
1176
1177   if (address_length < 32)
1178     ip4_add_del_route (im, &x);
1179
1180   x.dst_address_length = 32;
1181   ip4_add_del_route (im, &x);
1182
1183   ip4_delete_matching_routes (im,
1184                               fib_index,
1185                               IP4_ROUTE_FLAG_FIB_INDEX,
1186                               address,
1187                               address_length);
1188 }
1189
1190 typedef struct {
1191     u32 sw_if_index;
1192     ip4_address_t address;
1193     u32 length;
1194 } ip4_interface_address_t;
1195
1196 static clib_error_t *
1197 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1198                                         u32 sw_if_index,
1199                                         ip4_address_t * new_address,
1200                                         u32 new_length,
1201                                         u32 redistribute,
1202                                         u32 insert_routes,
1203                                         u32 is_del);
1204
1205 static clib_error_t *
1206 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1207                                         u32 sw_if_index,
1208                                         ip4_address_t * address,
1209                                         u32 address_length,
1210                                         u32 redistribute,
1211                                         u32 insert_routes,
1212                                         u32 is_del)
1213 {
1214   vnet_main_t * vnm = vnet_get_main();
1215   ip4_main_t * im = &ip4_main;
1216   ip_lookup_main_t * lm = &im->lookup_main;
1217   clib_error_t * error = 0;
1218   u32 if_address_index, elts_before;
1219   ip4_address_fib_t ip4_af, * addr_fib = 0;
1220
1221   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1222   ip4_addr_fib_init (&ip4_af, address,
1223                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1224   vec_add1 (addr_fib, ip4_af);
1225
1226   /* When adding an address check that it does not conflict with an existing address. */
1227   if (! is_del)
1228     {
1229       ip_interface_address_t * ia;
1230       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1231                                     0 /* honor unnumbered */,
1232       ({
1233         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1234
1235         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1236             || ip4_destination_matches_route (im, x, address, address_length))
1237           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1238                                     format_ip4_address_and_length, address, address_length,
1239                                     format_ip4_address_and_length, x, ia->address_length,
1240                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1241       }));
1242     }
1243
1244   elts_before = pool_elts (lm->if_address_pool);
1245
1246   error = ip_interface_address_add_del
1247     (lm,
1248      sw_if_index,
1249      addr_fib,
1250      address_length,
1251      is_del,
1252      &if_address_index);
1253   if (error)
1254     goto done;
1255   
1256   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1257     {
1258       if (is_del)
1259         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1260                                   address_length);
1261       
1262       else
1263           ip4_add_interface_routes (sw_if_index,
1264                                     im, ip4_af.fib_index,
1265                                     pool_elt_at_index 
1266                                     (lm->if_address_pool, if_address_index));
1267     }
1268
1269   /* If pool did not grow/shrink: add duplicate address. */
1270   if (elts_before != pool_elts (lm->if_address_pool))
1271     {
1272       ip4_add_del_interface_address_callback_t * cb;
1273       vec_foreach (cb, im->add_del_interface_address_callbacks)
1274         cb->function (im, cb->function_opaque, sw_if_index,
1275                       address, address_length,
1276                       if_address_index,
1277                       is_del);
1278     }
1279
1280  done:
1281   vec_free (addr_fib);
1282   return error;
1283 }
1284
1285 clib_error_t *
1286 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1287                                ip4_address_t * address, u32 address_length,
1288                                u32 is_del)
1289 {
1290   return ip4_add_del_interface_address_internal
1291     (vm, sw_if_index, address, address_length,
1292      /* redistribute */ 1,
1293      /* insert_routes */ 1,
1294      is_del);
1295 }
1296
1297 static clib_error_t *
1298 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1299                                 u32 sw_if_index,
1300                                 u32 flags)
1301 {
1302   ip4_main_t * im = &ip4_main;
1303   ip_interface_address_t * ia;
1304   ip4_address_t * a;
1305   u32 is_admin_up, fib_index;
1306   
1307   /* Fill in lookup tables with default table (0). */
1308   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1309   
1310   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1311   
1312   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1313   
1314   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1315
1316   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1317                                 0 /* honor unnumbered */,
1318   ({
1319     a = ip_interface_address_get_address (&im->lookup_main, ia);
1320     if (is_admin_up)
1321       ip4_add_interface_routes (sw_if_index,
1322                                 im, fib_index,
1323                                 ia);
1324     else
1325       ip4_del_interface_routes (im, fib_index,
1326                                 a, ia->address_length);
1327   }));
1328
1329   return 0;
1330 }
1331  
1332 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1333
1334 /* Built-in ip4 unicast rx feature path definition */
1335 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1336   .node_name = "ip4-inacl", 
1337   .runs_before = {"ip4-source-check-via-rx", 0}, 
1338   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1339 };
1340
1341 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1342   .node_name = "ip4-source-check-via-rx",
1343   .runs_before = {"ip4-source-check-via-any", 0},
1344   .feature_index = 
1345   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1346 };
1347
1348 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1349   .node_name = "ip4-source-check-via-any",
1350   .runs_before = {"ipsec-input-ip4", 0},
1351   .feature_index = 
1352   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1353 };
1354
1355 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1356   .node_name = "ipsec-input-ip4",
1357   .runs_before = {"vpath-input-ip4", 0},
1358   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1359 };
1360
1361 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1362   .node_name = "vpath-input-ip4",
1363   .runs_before = {"ip4-lookup", 0},
1364   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1365 };
1366
1367 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1368   .node_name = "ip4-lookup",
1369   .runs_before = {0}, /* not before any other features */
1370   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1371 };
1372
1373 /* Built-in ip4 multicast rx feature path definition */
1374 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1375   .node_name = "vpath-input-ip4",
1376   .runs_before = {"ip4-lookup-multicast", 0},
1377   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1378 };
1379
1380 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1381   .node_name = "ip4-lookup-multicast",
1382   .runs_before = {0}, /* not before any other features */
1383   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1384 };
1385
1386 static char * feature_start_nodes[] = 
1387   { "ip4-input", "ip4-input-no-checksum"};
1388
1389 static clib_error_t *
1390 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1391 {
1392   ip_lookup_main_t * lm = &im->lookup_main;
1393   clib_error_t * error;
1394   vnet_cast_t cast;
1395
1396   for (cast = 0; cast < VNET_N_CAST; cast++)
1397     {
1398       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1399       vnet_config_main_t * vcm = &cm->config_main;
1400
1401       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1402                                          feature_start_nodes,
1403                                          ARRAY_LEN(feature_start_nodes),
1404                                          cast,
1405                                          1 /* is_ip4 */)))
1406         return error;
1407     }
1408   return 0;
1409 }
1410
1411 static clib_error_t *
1412 ip4_sw_interface_add_del (vnet_main_t * vnm,
1413                           u32 sw_if_index,
1414                           u32 is_add)
1415 {
1416   vlib_main_t * vm = vnm->vlib_main;
1417   ip4_main_t * im = &ip4_main;
1418   ip_lookup_main_t * lm = &im->lookup_main;
1419   u32 ci, cast;
1420   u32 feature_index;
1421
1422   for (cast = 0; cast < VNET_N_CAST; cast++)
1423     {
1424       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1425       vnet_config_main_t * vcm = &cm->config_main;
1426
1427       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1428       ci = cm->config_index_by_sw_if_index[sw_if_index];
1429
1430       if (cast == VNET_UNICAST)
1431         feature_index = im->ip4_unicast_rx_feature_lookup;
1432       else
1433         feature_index = im->ip4_multicast_rx_feature_lookup;
1434
1435       if (is_add)
1436         ci = vnet_config_add_feature (vm, vcm,
1437                                       ci,
1438                                       feature_index,
1439                                       /* config data */ 0,
1440                                       /* # bytes of config data */ 0);
1441       else
1442         ci = vnet_config_del_feature (vm, vcm,
1443                                       ci,
1444                                       feature_index,
1445                                       /* config data */ 0,
1446                                       /* # bytes of config data */ 0);
1447
1448       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1449     }
1450
1451   return /* no error */ 0;
1452 }
1453
1454 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1455
1456 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1457
1458 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1459   .function = ip4_lookup,
1460   .name = "ip4-lookup",
1461   .vector_size = sizeof (u32),
1462
1463   .format_trace = format_ip4_lookup_trace,
1464
1465   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1466   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1467 };
1468
1469 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1470
1471 static uword
1472 ip4_indirect (vlib_main_t * vm,
1473                vlib_node_runtime_t * node,
1474                vlib_frame_t * frame)
1475 {
1476   return ip4_lookup_inline (vm, node, frame,
1477                             /* lookup_for_responses_to_locally_received_packets */ 0,
1478                             /* is_indirect */ 1);
1479 }
1480
1481 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1482   .function = ip4_indirect,
1483   .name = "ip4-indirect",
1484   .vector_size = sizeof (u32),
1485   .sibling_of = "ip4-lookup",
1486   .format_trace = format_ip4_lookup_trace,
1487
1488   .n_next_nodes = 0,
1489 };
1490
1491 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1492
1493
1494 /* Global IP4 main. */
1495 ip4_main_t ip4_main;
1496
1497 clib_error_t *
1498 ip4_lookup_init (vlib_main_t * vm)
1499 {
1500   ip4_main_t * im = &ip4_main;
1501   clib_error_t * error;
1502   uword i;
1503
1504   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1505     {
1506       u32 m;
1507
1508       if (i < 32)
1509         m = pow2_mask (i) << (32 - i);
1510       else 
1511         m = ~0;
1512       im->fib_masks[i] = clib_host_to_net_u32 (m);
1513     }
1514
1515   /* Create FIB with index 0 and table id of 0. */
1516   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1517
1518   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1519
1520   {
1521     pg_node_t * pn;
1522     pn = pg_get_node (ip4_lookup_node.index);
1523     pn->unformat_edit = unformat_pg_ip4_header;
1524   }
1525
1526   {
1527     ethernet_arp_header_t h;
1528
1529     memset (&h, 0, sizeof (h));
1530
1531     /* Set target ethernet address to all zeros. */
1532     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1533
1534 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1535 #define _8(f,v) h.f = v;
1536     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1537     _16 (l3_type, ETHERNET_TYPE_IP4);
1538     _8 (n_l2_address_bytes, 6);
1539     _8 (n_l3_address_bytes, 4);
1540     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1541 #undef _16
1542 #undef _8
1543
1544     vlib_packet_template_init (vm,
1545                                &im->ip4_arp_request_packet_template,
1546                                /* data */ &h,
1547                                sizeof (h),
1548                                /* alloc chunk size */ 8,
1549                                "ip4 arp");
1550   }
1551
1552   error = ip4_feature_init (vm, im);
1553
1554   return error;
1555 }
1556
1557 VLIB_INIT_FUNCTION (ip4_lookup_init);
1558
1559 typedef struct {
1560   /* Adjacency taken. */
1561   u32 adj_index;
1562   u32 flow_hash;
1563   u32 fib_index;
1564
1565   /* Packet data, possibly *after* rewrite. */
1566   u8 packet_data[64 - 1*sizeof(u32)];
1567 } ip4_forward_next_trace_t;
1568
1569 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1570 {
1571   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1572   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1573   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1574   uword indent = format_get_indent (s);
1575   s = format (s, "%U%U",
1576                 format_white_space, indent,
1577                 format_ip4_header, t->packet_data);
1578   return s;
1579 }
1580
1581 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1582 {
1583   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1584   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1585   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1586   vnet_main_t * vnm = vnet_get_main();
1587   ip4_main_t * im = &ip4_main;
1588   uword indent = format_get_indent (s);
1589
1590   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1591               t->fib_index, t->adj_index, format_ip_adjacency,
1592               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1593   s = format (s, "\n%U%U",
1594               format_white_space, indent,
1595               format_ip4_header, t->packet_data);
1596   return s;
1597 }
1598
1599 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1600 {
1601   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1602   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1603   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1604   vnet_main_t * vnm = vnet_get_main();
1605   ip4_main_t * im = &ip4_main;
1606   uword indent = format_get_indent (s);
1607
1608   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1609               t->fib_index, t->adj_index, format_ip_adjacency,
1610               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1611   s = format (s, "\n%U%U",
1612               format_white_space, indent,
1613               format_ip_adjacency_packet_data,
1614               vnm, &im->lookup_main, t->adj_index,
1615               t->packet_data, sizeof (t->packet_data));
1616   return s;
1617 }
1618
1619 /* Common trace function for all ip4-forward next nodes. */
1620 void
1621 ip4_forward_next_trace (vlib_main_t * vm,
1622                         vlib_node_runtime_t * node,
1623                         vlib_frame_t * frame,
1624                         vlib_rx_or_tx_t which_adj_index)
1625 {
1626   u32 * from, n_left;
1627   ip4_main_t * im = &ip4_main;
1628
1629   n_left = frame->n_vectors;
1630   from = vlib_frame_vector_args (frame);
1631   
1632   while (n_left >= 4)
1633     {
1634       u32 bi0, bi1;
1635       vlib_buffer_t * b0, * b1;
1636       ip4_forward_next_trace_t * t0, * t1;
1637
1638       /* Prefetch next iteration. */
1639       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1640       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1641
1642       bi0 = from[0];
1643       bi1 = from[1];
1644
1645       b0 = vlib_get_buffer (vm, bi0);
1646       b1 = vlib_get_buffer (vm, bi1);
1647
1648       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1649         {
1650           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1651           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1652           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1653           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1654               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1655               vec_elt (im->fib_index_by_sw_if_index,
1656                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1657
1658           clib_memcpy (t0->packet_data,
1659                   vlib_buffer_get_current (b0),
1660                   sizeof (t0->packet_data));
1661         }
1662       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1663         {
1664           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1665           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1666           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1667           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1668               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1669               vec_elt (im->fib_index_by_sw_if_index,
1670                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1671           clib_memcpy (t1->packet_data,
1672                   vlib_buffer_get_current (b1),
1673                   sizeof (t1->packet_data));
1674         }
1675       from += 2;
1676       n_left -= 2;
1677     }
1678
1679   while (n_left >= 1)
1680     {
1681       u32 bi0;
1682       vlib_buffer_t * b0;
1683       ip4_forward_next_trace_t * t0;
1684
1685       bi0 = from[0];
1686
1687       b0 = vlib_get_buffer (vm, bi0);
1688
1689       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1690         {
1691           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1692           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1693           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1694           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1695               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1696               vec_elt (im->fib_index_by_sw_if_index,
1697                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1698           clib_memcpy (t0->packet_data,
1699                   vlib_buffer_get_current (b0),
1700                   sizeof (t0->packet_data));
1701         }
1702       from += 1;
1703       n_left -= 1;
1704     }
1705 }
1706
1707 static uword
1708 ip4_drop_or_punt (vlib_main_t * vm,
1709                   vlib_node_runtime_t * node,
1710                   vlib_frame_t * frame,
1711                   ip4_error_t error_code)
1712 {
1713   u32 * buffers = vlib_frame_vector_args (frame);
1714   uword n_packets = frame->n_vectors;
1715
1716   vlib_error_drop_buffers (vm, node,
1717                            buffers,
1718                            /* stride */ 1,
1719                            n_packets,
1720                            /* next */ 0,
1721                            ip4_input_node.index,
1722                            error_code);
1723
1724   if (node->flags & VLIB_NODE_FLAG_TRACE)
1725     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1726
1727   return n_packets;
1728 }
1729
1730 static uword
1731 ip4_drop (vlib_main_t * vm,
1732           vlib_node_runtime_t * node,
1733           vlib_frame_t * frame)
1734 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1735
1736 static uword
1737 ip4_punt (vlib_main_t * vm,
1738           vlib_node_runtime_t * node,
1739           vlib_frame_t * frame)
1740 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1741
1742 static uword
1743 ip4_miss (vlib_main_t * vm,
1744           vlib_node_runtime_t * node,
1745           vlib_frame_t * frame)
1746 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1747
1748 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1749   .function = ip4_drop,
1750   .name = "ip4-drop",
1751   .vector_size = sizeof (u32),
1752
1753   .format_trace = format_ip4_forward_next_trace,
1754
1755   .n_next_nodes = 1,
1756   .next_nodes = {
1757     [0] = "error-drop",
1758   },
1759 };
1760
1761 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1762
1763 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1764   .function = ip4_punt,
1765   .name = "ip4-punt",
1766   .vector_size = sizeof (u32),
1767
1768   .format_trace = format_ip4_forward_next_trace,
1769
1770   .n_next_nodes = 1,
1771   .next_nodes = {
1772     [0] = "error-punt",
1773   },
1774 };
1775
1776 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1777
1778 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1779   .function = ip4_miss,
1780   .name = "ip4-miss",
1781   .vector_size = sizeof (u32),
1782
1783   .format_trace = format_ip4_forward_next_trace,
1784
1785   .n_next_nodes = 1,
1786   .next_nodes = {
1787     [0] = "error-drop",
1788   },
1789 };
1790
1791 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1792
1793 /* Compute TCP/UDP/ICMP4 checksum in software. */
1794 u16
1795 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1796                               ip4_header_t * ip0)
1797 {
1798   ip_csum_t sum0;
1799   u32 ip_header_length, payload_length_host_byte_order;
1800   u32 n_this_buffer, n_bytes_left;
1801   u16 sum16;
1802   void * data_this_buffer;
1803   
1804   /* Initialize checksum with ip header. */
1805   ip_header_length = ip4_header_bytes (ip0);
1806   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1807   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1808
1809   if (BITS (uword) == 32)
1810     {
1811       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1812       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1813     }
1814   else
1815     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1816
1817   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1818   data_this_buffer = (void *) ip0 + ip_header_length;
1819   if (n_this_buffer + ip_header_length > p0->current_length)
1820     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1821   while (1)
1822     {
1823       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1824       n_bytes_left -= n_this_buffer;
1825       if (n_bytes_left == 0)
1826         break;
1827
1828       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1829       p0 = vlib_get_buffer (vm, p0->next_buffer);
1830       data_this_buffer = vlib_buffer_get_current (p0);
1831       n_this_buffer = p0->current_length;
1832     }
1833
1834   sum16 = ~ ip_csum_fold (sum0);
1835
1836   return sum16;
1837 }
1838
1839 static u32
1840 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1841 {
1842   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1843   udp_header_t * udp0;
1844   u16 sum16;
1845
1846   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1847           || ip0->protocol == IP_PROTOCOL_UDP);
1848
1849   udp0 = (void *) (ip0 + 1);
1850   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1851     {
1852       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1853                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1854       return p0->flags;
1855     }
1856
1857   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1858
1859   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1860                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1861
1862   return p0->flags;
1863 }
1864
1865 static uword
1866 ip4_local (vlib_main_t * vm,
1867            vlib_node_runtime_t * node,
1868            vlib_frame_t * frame)
1869 {
1870   ip4_main_t * im = &ip4_main;
1871   ip_lookup_main_t * lm = &im->lookup_main;
1872   ip_local_next_t next_index;
1873   u32 * from, * to_next, n_left_from, n_left_to_next;
1874   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1875
1876   from = vlib_frame_vector_args (frame);
1877   n_left_from = frame->n_vectors;
1878   next_index = node->cached_next_index;
1879   
1880   if (node->flags & VLIB_NODE_FLAG_TRACE)
1881     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1882
1883   while (n_left_from > 0)
1884     {
1885       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1886
1887       while (n_left_from >= 4 && n_left_to_next >= 2)
1888         {
1889           vlib_buffer_t * p0, * p1;
1890           ip4_header_t * ip0, * ip1;
1891           udp_header_t * udp0, * udp1;
1892           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1893           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1894           ip_adjacency_t * adj0, * adj1;
1895           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1896           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1897           i32 len_diff0, len_diff1;
1898           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1899           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1900           u8 enqueue_code;
1901       
1902           pi0 = to_next[0] = from[0];
1903           pi1 = to_next[1] = from[1];
1904           from += 2;
1905           n_left_from -= 2;
1906           to_next += 2;
1907           n_left_to_next -= 2;
1908       
1909           p0 = vlib_get_buffer (vm, pi0);
1910           p1 = vlib_get_buffer (vm, pi1);
1911
1912           ip0 = vlib_buffer_get_current (p0);
1913           ip1 = vlib_buffer_get_current (p1);
1914
1915           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1916                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1917           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1918                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1919
1920           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1921           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1922
1923           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1924
1925           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1926           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1927
1928           /* Treat IP frag packets as "experimental" protocol for now
1929              until support of IP frag reassembly is implemented */
1930           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1931           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1932           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1933           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1934           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1935           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1936
1937           flags0 = p0->flags;
1938           flags1 = p1->flags;
1939
1940           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1941           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1942
1943           udp0 = ip4_next_header (ip0);
1944           udp1 = ip4_next_header (ip1);
1945
1946           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1947           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1948           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1949
1950           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1951           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1952
1953           /* Verify UDP length. */
1954           ip_len0 = clib_net_to_host_u16 (ip0->length);
1955           ip_len1 = clib_net_to_host_u16 (ip1->length);
1956           udp_len0 = clib_net_to_host_u16 (udp0->length);
1957           udp_len1 = clib_net_to_host_u16 (udp1->length);
1958
1959           len_diff0 = ip_len0 - udp_len0;
1960           len_diff1 = ip_len1 - udp_len1;
1961
1962           len_diff0 = is_udp0 ? len_diff0 : 0;
1963           len_diff1 = is_udp1 ? len_diff1 : 0;
1964
1965           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1966                                 & good_tcp_udp0 & good_tcp_udp1)))
1967             {
1968               if (is_tcp_udp0)
1969                 {
1970                   if (is_tcp_udp0
1971                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1972                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1973                   good_tcp_udp0 =
1974                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1975                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1976                 }
1977               if (is_tcp_udp1)
1978                 {
1979                   if (is_tcp_udp1
1980                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1981                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1982                   good_tcp_udp1 =
1983                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1984                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1985                 }
1986             }
1987
1988           good_tcp_udp0 &= len_diff0 >= 0;
1989           good_tcp_udp1 &= len_diff1 >= 0;
1990
1991           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1992           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1993
1994           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1995
1996           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1997           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1998
1999           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2000           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2001                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2002                     : error0);
2003           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2004                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2005                     : error1);
2006
2007           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2008           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2009
2010           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2011           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2012
2013           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2014           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2015
2016           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2017                                                            &ip0->src_address,
2018                                                            /* no_default_route */ 1));
2019           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2020                                                            &ip1->src_address,
2021                                                            /* no_default_route */ 1));
2022
2023           adj0 = ip_get_adjacency (lm, adj_index0);
2024           adj1 = ip_get_adjacency (lm, adj_index1);
2025
2026           /* 
2027            * Must have a route to source otherwise we drop the packet.
2028            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2029            */
2030           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2031                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2032                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2033                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2034                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2035                     ? IP4_ERROR_SRC_LOOKUP_MISS
2036                     : error0);
2037           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2038                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2039                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2040                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2041                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2042                     ? IP4_ERROR_SRC_LOOKUP_MISS
2043                     : error1);
2044
2045           next0 = lm->local_next_by_ip_protocol[proto0];
2046           next1 = lm->local_next_by_ip_protocol[proto1];
2047
2048           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2049           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2050
2051           p0->error = error0 ? error_node->errors[error0] : 0;
2052           p1->error = error1 ? error_node->errors[error1] : 0;
2053
2054           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2055
2056           if (PREDICT_FALSE (enqueue_code != 0))
2057             {
2058               switch (enqueue_code)
2059                 {
2060                 case 1:
2061                   /* A B A */
2062                   to_next[-2] = pi1;
2063                   to_next -= 1;
2064                   n_left_to_next += 1;
2065                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2066                   break;
2067
2068                 case 2:
2069                   /* A A B */
2070                   to_next -= 1;
2071                   n_left_to_next += 1;
2072                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2073                   break;
2074
2075                 case 3:
2076                   /* A B B or A B C */
2077                   to_next -= 2;
2078                   n_left_to_next += 2;
2079                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2080                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2081                   if (next0 == next1)
2082                     {
2083                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2084                       next_index = next1;
2085                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2086                     }
2087                   break;
2088                 }
2089             }
2090         }
2091
2092       while (n_left_from > 0 && n_left_to_next > 0)
2093         {
2094           vlib_buffer_t * p0;
2095           ip4_header_t * ip0;
2096           udp_header_t * udp0;
2097           ip4_fib_mtrie_t * mtrie0;
2098           ip4_fib_mtrie_leaf_t leaf0;
2099           ip_adjacency_t * adj0;
2100           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2101           i32 len_diff0;
2102           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2103       
2104           pi0 = to_next[0] = from[0];
2105           from += 1;
2106           n_left_from -= 1;
2107           to_next += 1;
2108           n_left_to_next -= 1;
2109       
2110           p0 = vlib_get_buffer (vm, pi0);
2111
2112           ip0 = vlib_buffer_get_current (p0);
2113
2114           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2115                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2116
2117           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2118
2119           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2120
2121           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2122
2123           /* Treat IP frag packets as "experimental" protocol for now
2124              until support of IP frag reassembly is implemented */
2125           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2126           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2127           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2128
2129           flags0 = p0->flags;
2130
2131           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2132
2133           udp0 = ip4_next_header (ip0);
2134
2135           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2136           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2137
2138           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2139
2140           /* Verify UDP length. */
2141           ip_len0 = clib_net_to_host_u16 (ip0->length);
2142           udp_len0 = clib_net_to_host_u16 (udp0->length);
2143
2144           len_diff0 = ip_len0 - udp_len0;
2145
2146           len_diff0 = is_udp0 ? len_diff0 : 0;
2147
2148           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2149             {
2150               if (is_tcp_udp0)
2151                 {
2152                   if (is_tcp_udp0
2153                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2154                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2155                   good_tcp_udp0 =
2156                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2157                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2158                 }
2159             }
2160
2161           good_tcp_udp0 &= len_diff0 >= 0;
2162
2163           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2164
2165           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2166
2167           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2168
2169           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2170           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2171                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2172                     : error0);
2173
2174           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2175
2176           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2177           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2178
2179           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2180                                                            &ip0->src_address,
2181                                                            /* no_default_route */ 1));
2182
2183           adj0 = ip_get_adjacency (lm, adj_index0);
2184
2185           /* Must have a route to source otherwise we drop the packet. */
2186           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2187                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2188                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2189                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2190                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2191                     ? IP4_ERROR_SRC_LOOKUP_MISS
2192                     : error0);
2193
2194           next0 = lm->local_next_by_ip_protocol[proto0];
2195
2196           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2197
2198           p0->error = error0? error_node->errors[error0] : 0;
2199
2200           if (PREDICT_FALSE (next0 != next_index))
2201             {
2202               n_left_to_next += 1;
2203               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2204
2205               next_index = next0;
2206               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2207               to_next[0] = pi0;
2208               to_next += 1;
2209               n_left_to_next -= 1;
2210             }
2211         }
2212   
2213       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2214     }
2215
2216   return frame->n_vectors;
2217 }
2218
2219 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2220   .function = ip4_local,
2221   .name = "ip4-local",
2222   .vector_size = sizeof (u32),
2223
2224   .format_trace = format_ip4_forward_next_trace,
2225
2226   .n_next_nodes = IP_LOCAL_N_NEXT,
2227   .next_nodes = {
2228     [IP_LOCAL_NEXT_DROP] = "error-drop",
2229     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2230     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2231     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2232   },
2233 };
2234
2235 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2236
2237 void ip4_register_protocol (u32 protocol, u32 node_index)
2238 {
2239   vlib_main_t * vm = vlib_get_main();
2240   ip4_main_t * im = &ip4_main;
2241   ip_lookup_main_t * lm = &im->lookup_main;
2242
2243   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2244   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2245 }
2246
2247 static clib_error_t *
2248 show_ip_local_command_fn (vlib_main_t * vm,
2249                           unformat_input_t * input,
2250                          vlib_cli_command_t * cmd)
2251 {
2252   ip4_main_t * im = &ip4_main;
2253   ip_lookup_main_t * lm = &im->lookup_main;
2254   int i;
2255
2256   vlib_cli_output (vm, "Protocols handled by ip4_local");
2257   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2258     {
2259       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2260         vlib_cli_output (vm, "%d", i);
2261     }
2262   return 0;
2263 }
2264
2265
2266
2267 VLIB_CLI_COMMAND (show_ip_local, static) = {
2268   .path = "show ip local",
2269   .function = show_ip_local_command_fn,
2270   .short_help = "Show ip local protocol table",
2271 };
2272
2273 static uword
2274 ip4_arp (vlib_main_t * vm,
2275          vlib_node_runtime_t * node,
2276          vlib_frame_t * frame)
2277 {
2278   vnet_main_t * vnm = vnet_get_main();
2279   ip4_main_t * im = &ip4_main;
2280   ip_lookup_main_t * lm = &im->lookup_main;
2281   u32 * from, * to_next_drop;
2282   uword n_left_from, n_left_to_next_drop, next_index;
2283   static f64 time_last_seed_change = -1e100;
2284   static u32 hash_seeds[3];
2285   static uword hash_bitmap[256 / BITS (uword)]; 
2286   f64 time_now;
2287
2288   if (node->flags & VLIB_NODE_FLAG_TRACE)
2289     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2290
2291   time_now = vlib_time_now (vm);
2292   if (time_now - time_last_seed_change > 1e-3)
2293     {
2294       uword i;
2295       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2296                                              sizeof (hash_seeds));
2297       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2298         hash_seeds[i] = r[i];
2299
2300       /* Mark all hash keys as been no-seen before. */
2301       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2302         hash_bitmap[i] = 0;
2303
2304       time_last_seed_change = time_now;
2305     }
2306
2307   from = vlib_frame_vector_args (frame);
2308   n_left_from = frame->n_vectors;
2309   next_index = node->cached_next_index;
2310   if (next_index == IP4_ARP_NEXT_DROP)
2311     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2312
2313   while (n_left_from > 0)
2314     {
2315       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2316                            to_next_drop, n_left_to_next_drop);
2317
2318       while (n_left_from > 0 && n_left_to_next_drop > 0)
2319         {
2320           vlib_buffer_t * p0;
2321           ip4_header_t * ip0;
2322           ethernet_header_t * eh0;
2323           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2324           uword bm0;
2325           ip_adjacency_t * adj0;
2326
2327           pi0 = from[0];
2328
2329           p0 = vlib_get_buffer (vm, pi0);
2330
2331           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2332           adj0 = ip_get_adjacency (lm, adj_index0);
2333           ip0 = vlib_buffer_get_current (p0);
2334
2335           /* If packet destination is not local, send ARP to next hop */
2336           if (adj0->arp.next_hop.ip4.as_u32)
2337             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2338
2339           /* 
2340            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2341            * rewrite to this packet, we need to skip it here.
2342            * Note, to distinguish from src IP addr *.8.6.*, we
2343            * check for a bcast eth dest instead of IPv4 version.
2344            */
2345           eh0 = (ethernet_header_t*)ip0;
2346           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2347             {
2348               u32 vlan_num = 0;
2349               u16 * etype = &eh0->type;
2350               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2351                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2352                 {
2353                   vlan_num += 1;
2354                   etype += 2; //vlan tag also 16 bits, same as etype
2355                 }
2356               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2357                 {
2358                   vlib_buffer_advance (
2359                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2360                   ip0 = vlib_buffer_get_current (p0);
2361                 }
2362             }
2363
2364           a0 = hash_seeds[0];
2365           b0 = hash_seeds[1];
2366           c0 = hash_seeds[2];
2367
2368           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2369           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2370
2371           a0 ^= ip0->dst_address.data_u32;
2372           b0 ^= sw_if_index0;
2373
2374           hash_v3_finalize32 (a0, b0, c0);
2375
2376           c0 &= BITS (hash_bitmap) - 1;
2377           c0 = c0 / BITS (uword);
2378           m0 = (uword) 1 << (c0 % BITS (uword));
2379
2380           bm0 = hash_bitmap[c0];
2381           drop0 = (bm0 & m0) != 0;
2382
2383           /* Mark it as seen. */
2384           hash_bitmap[c0] = bm0 | m0;
2385
2386           from += 1;
2387           n_left_from -= 1;
2388           to_next_drop[0] = pi0;
2389           to_next_drop += 1;
2390           n_left_to_next_drop -= 1;
2391
2392           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2393
2394           if (drop0)
2395             continue;
2396
2397           /* 
2398            * Can happen if the control-plane is programming tables
2399            * with traffic flowing; at least that's today's lame excuse.
2400            */
2401           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2402             {
2403               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2404             }
2405           else
2406           /* Send ARP request. */
2407           {
2408             u32 bi0 = 0;
2409             vlib_buffer_t * b0;
2410             ethernet_arp_header_t * h0;
2411             vnet_hw_interface_t * hw_if0;
2412
2413             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2414
2415             /* Add rewrite/encap string for ARP packet. */
2416             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2417
2418             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2419
2420             /* Src ethernet address in ARP header. */
2421             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2422                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2423
2424             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2425                 //No source address available
2426                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2427                 vlib_buffer_free(vm, &bi0, 1);
2428                 continue;
2429             }
2430
2431             /* Copy in destination address we are requesting. */
2432             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2433
2434             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2435             b0 = vlib_get_buffer (vm, bi0);
2436             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2437
2438             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2439
2440             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2441           }
2442         }
2443
2444       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2445     }
2446
2447   return frame->n_vectors;
2448 }
2449
2450 static char * ip4_arp_error_strings[] = {
2451   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2452   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2453   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2454   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2455   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2456   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2457 };
2458
2459 VLIB_REGISTER_NODE (ip4_arp_node) = {
2460   .function = ip4_arp,
2461   .name = "ip4-arp",
2462   .vector_size = sizeof (u32),
2463
2464   .format_trace = format_ip4_forward_next_trace,
2465
2466   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2467   .error_strings = ip4_arp_error_strings,
2468
2469   .n_next_nodes = IP4_ARP_N_NEXT,
2470   .next_nodes = {
2471     [IP4_ARP_NEXT_DROP] = "error-drop",
2472   },
2473 };
2474
2475 #define foreach_notrace_ip4_arp_error           \
2476 _(DROP)                                         \
2477 _(REQUEST_SENT)                                 \
2478 _(REPLICATE_DROP)                               \
2479 _(REPLICATE_FAIL)
2480
2481 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2482 {
2483   vlib_node_runtime_t *rt = 
2484     vlib_node_get_runtime (vm, ip4_arp_node.index);
2485
2486   /* don't trace ARP request packets */
2487 #define _(a)                                    \
2488     vnet_pcap_drop_trace_filter_add_del         \
2489         (rt->errors[IP4_ARP_ERROR_##a],         \
2490          1 /* is_add */);
2491     foreach_notrace_ip4_arp_error;
2492 #undef _
2493   return 0;
2494 }
2495
2496 VLIB_INIT_FUNCTION(arp_notrace_init);
2497
2498
2499 /* Send an ARP request to see if given destination is reachable on given interface. */
2500 clib_error_t *
2501 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2502 {
2503   vnet_main_t * vnm = vnet_get_main();
2504   ip4_main_t * im = &ip4_main;
2505   ethernet_arp_header_t * h;
2506   ip4_address_t * src;
2507   ip_interface_address_t * ia;
2508   ip_adjacency_t * adj;
2509   vnet_hw_interface_t * hi;
2510   vnet_sw_interface_t * si;
2511   vlib_buffer_t * b;
2512   u32 bi = 0;
2513
2514   si = vnet_get_sw_interface (vnm, sw_if_index);
2515
2516   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2517     {
2518       return clib_error_return (0, "%U: interface %U down",
2519                                 format_ip4_address, dst, 
2520                                 format_vnet_sw_if_index_name, vnm, 
2521                                 sw_if_index);
2522     }
2523
2524   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2525   if (! src)
2526     {
2527       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2528       return clib_error_return 
2529         (0, "no matching interface address for destination %U (interface %U)",
2530          format_ip4_address, dst,
2531          format_vnet_sw_if_index_name, vnm, sw_if_index);
2532     }
2533
2534   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2535
2536   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2537
2538   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2539
2540   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2541
2542   h->ip4_over_ethernet[0].ip4 = src[0];
2543   h->ip4_over_ethernet[1].ip4 = dst[0];
2544
2545   b = vlib_get_buffer (vm, bi);
2546   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2547
2548   /* Add encapsulation string for software interface (e.g. ethernet header). */
2549   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2550   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2551
2552   {
2553     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2554     u32 * to_next = vlib_frame_vector_args (f);
2555     to_next[0] = bi;
2556     f->n_vectors = 1;
2557     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2558   }
2559
2560   return /* no error */ 0;
2561 }
2562
2563 typedef enum {
2564   IP4_REWRITE_NEXT_DROP,
2565   IP4_REWRITE_NEXT_ARP,
2566   IP4_REWRITE_NEXT_ICMP_ERROR,
2567 } ip4_rewrite_next_t;
2568
2569 always_inline uword
2570 ip4_rewrite_inline (vlib_main_t * vm,
2571                     vlib_node_runtime_t * node,
2572                     vlib_frame_t * frame,
2573                     int rewrite_for_locally_received_packets)
2574 {
2575   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2576   u32 * from = vlib_frame_vector_args (frame);
2577   u32 n_left_from, n_left_to_next, * to_next, next_index;
2578   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2579   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2580
2581   n_left_from = frame->n_vectors;
2582   next_index = node->cached_next_index;
2583   u32 cpu_index = os_get_cpu_number();
2584   
2585   while (n_left_from > 0)
2586     {
2587       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2588
2589       while (n_left_from >= 4 && n_left_to_next >= 2)
2590         {
2591           ip_adjacency_t * adj0, * adj1;
2592           vlib_buffer_t * p0, * p1;
2593           ip4_header_t * ip0, * ip1;
2594           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2595           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2596           u32 next0_override, next1_override;
2597       
2598           if (rewrite_for_locally_received_packets)
2599               next0_override = next1_override = 0;
2600
2601           /* Prefetch next iteration. */
2602           {
2603             vlib_buffer_t * p2, * p3;
2604
2605             p2 = vlib_get_buffer (vm, from[2]);
2606             p3 = vlib_get_buffer (vm, from[3]);
2607
2608             vlib_prefetch_buffer_header (p2, STORE);
2609             vlib_prefetch_buffer_header (p3, STORE);
2610
2611             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2612             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2613           }
2614
2615           pi0 = to_next[0] = from[0];
2616           pi1 = to_next[1] = from[1];
2617
2618           from += 2;
2619           n_left_from -= 2;
2620           to_next += 2;
2621           n_left_to_next -= 2;
2622       
2623           p0 = vlib_get_buffer (vm, pi0);
2624           p1 = vlib_get_buffer (vm, pi1);
2625
2626           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2627           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2628
2629           /* We should never rewrite a pkt using the MISS adjacency */
2630           ASSERT(adj_index0 && adj_index1);
2631
2632           ip0 = vlib_buffer_get_current (p0);
2633           ip1 = vlib_buffer_get_current (p1);
2634
2635           error0 = error1 = IP4_ERROR_NONE;
2636           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2637
2638           /* Decrement TTL & update checksum.
2639              Works either endian, so no need for byte swap. */
2640           if (! rewrite_for_locally_received_packets)
2641             {
2642               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2643
2644               /* Input node should have reject packets with ttl 0. */
2645               ASSERT (ip0->ttl > 0);
2646               ASSERT (ip1->ttl > 0);
2647
2648               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2649               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2650
2651               checksum0 += checksum0 >= 0xffff;
2652               checksum1 += checksum1 >= 0xffff;
2653
2654               ip0->checksum = checksum0;
2655               ip1->checksum = checksum1;
2656
2657               ttl0 -= 1;
2658               ttl1 -= 1;
2659
2660               ip0->ttl = ttl0;
2661               ip1->ttl = ttl1;
2662
2663               /*
2664                * If the ttl drops below 1 when forwarding, generate
2665                * an ICMP response.
2666                */
2667               if (PREDICT_FALSE(ttl0 <= 0))
2668                 {
2669                   error0 = IP4_ERROR_TIME_EXPIRED;
2670                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2671                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2672                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2673                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2674                 }
2675               if (PREDICT_FALSE(ttl1 <= 0))
2676                 {
2677                   error1 = IP4_ERROR_TIME_EXPIRED;
2678                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2679                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2680                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2681                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2682                 }
2683
2684               /* Verify checksum. */
2685               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2686               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2687             }
2688
2689           /* Rewrite packet header and updates lengths. */
2690           adj0 = ip_get_adjacency (lm, adj_index0);
2691           adj1 = ip_get_adjacency (lm, adj_index1);
2692       
2693           if (rewrite_for_locally_received_packets)
2694             {
2695               /*
2696                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2697                * we end up here with a local adjacency in hand
2698                * The local adj rewrite data is 0xfefe on purpose.
2699                * Bad engineer, no donut for you.
2700                */
2701               if (PREDICT_FALSE(adj0->lookup_next_index 
2702                                 == IP_LOOKUP_NEXT_LOCAL))
2703                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2704               if (PREDICT_FALSE(adj0->lookup_next_index
2705                                 == IP_LOOKUP_NEXT_ARP))
2706                 next0_override = IP4_REWRITE_NEXT_ARP;
2707               if (PREDICT_FALSE(adj1->lookup_next_index 
2708                                 == IP_LOOKUP_NEXT_LOCAL))
2709                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2710               if (PREDICT_FALSE(adj1->lookup_next_index
2711                                 == IP_LOOKUP_NEXT_ARP))
2712                 next1_override = IP4_REWRITE_NEXT_ARP;
2713             }
2714
2715           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2716           rw_len0 = adj0[0].rewrite_header.data_bytes;
2717           rw_len1 = adj1[0].rewrite_header.data_bytes;
2718
2719           /* Check MTU of outgoing interface. */
2720           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2721                     ? IP4_ERROR_MTU_EXCEEDED
2722                     : error0);
2723           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2724                     ? IP4_ERROR_MTU_EXCEEDED
2725                     : error1);
2726
2727           next0 = (error0 == IP4_ERROR_NONE)
2728             ? adj0[0].rewrite_header.next_index : next0;
2729
2730           if (rewrite_for_locally_received_packets)
2731               next0 = next0 && next0_override ? next0_override : next0;
2732
2733           next1 = (error1 == IP4_ERROR_NONE)
2734             ? adj1[0].rewrite_header.next_index : next1;
2735
2736           if (rewrite_for_locally_received_packets)
2737               next1 = next1 && next1_override ? next1_override : next1;
2738
2739           /* 
2740            * We've already accounted for an ethernet_header_t elsewhere
2741            */
2742           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2743               vlib_increment_combined_counter 
2744                   (&lm->adjacency_counters,
2745                    cpu_index, adj_index0, 
2746                    /* packet increment */ 0,
2747                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2748
2749           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2750               vlib_increment_combined_counter 
2751                   (&lm->adjacency_counters,
2752                    cpu_index, adj_index1, 
2753                    /* packet increment */ 0,
2754                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2755
2756           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2757            * to see the IP headerr */
2758           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2759             {
2760               p0->current_data -= rw_len0;
2761               p0->current_length += rw_len0;
2762               p0->error = error_node->errors[error0];
2763               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2764                   adj0[0].rewrite_header.sw_if_index;
2765             }
2766           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2767             {
2768               p1->current_data -= rw_len1;
2769               p1->current_length += rw_len1;
2770               p1->error = error_node->errors[error1];
2771               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2772                   adj1[0].rewrite_header.sw_if_index;
2773             }
2774
2775           /* Guess we are only writing on simple Ethernet header. */
2776           vnet_rewrite_two_headers (adj0[0], adj1[0],
2777                                     ip0, ip1,
2778                                     sizeof (ethernet_header_t));
2779       
2780           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2781                                            to_next, n_left_to_next,
2782                                            pi0, pi1, next0, next1);
2783         }
2784
2785       while (n_left_from > 0 && n_left_to_next > 0)
2786         {
2787           ip_adjacency_t * adj0;
2788           vlib_buffer_t * p0;
2789           ip4_header_t * ip0;
2790           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2791           u32 next0_override;
2792       
2793           if (rewrite_for_locally_received_packets)
2794               next0_override = 0;
2795
2796           pi0 = to_next[0] = from[0];
2797
2798           p0 = vlib_get_buffer (vm, pi0);
2799
2800           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2801
2802           /* We should never rewrite a pkt using the MISS adjacency */
2803           ASSERT(adj_index0);
2804
2805           adj0 = ip_get_adjacency (lm, adj_index0);
2806       
2807           ip0 = vlib_buffer_get_current (p0);
2808
2809           error0 = IP4_ERROR_NONE;
2810           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2811
2812           /* Decrement TTL & update checksum. */
2813           if (! rewrite_for_locally_received_packets)
2814             {
2815               i32 ttl0 = ip0->ttl;
2816
2817               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2818
2819               checksum0 += checksum0 >= 0xffff;
2820
2821               ip0->checksum = checksum0;
2822
2823               ASSERT (ip0->ttl > 0);
2824
2825               ttl0 -= 1;
2826
2827               ip0->ttl = ttl0;
2828
2829               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2830
2831               if (PREDICT_FALSE(ttl0 <= 0))
2832                 {
2833                   /*
2834                    * If the ttl drops below 1 when forwarding, generate
2835                    * an ICMP response.
2836                    */
2837                   error0 = IP4_ERROR_TIME_EXPIRED;
2838                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2839                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2840                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2841                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2842                 }
2843             }
2844
2845           if (rewrite_for_locally_received_packets)
2846             {
2847               /*
2848                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2849                * we end up here with a local adjacency in hand
2850                * The local adj rewrite data is 0xfefe on purpose.
2851                * Bad engineer, no donut for you.
2852                */
2853               if (PREDICT_FALSE(adj0->lookup_next_index 
2854                                 == IP_LOOKUP_NEXT_LOCAL))
2855                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2856               /* 
2857                * We have to override the next_index in ARP adjacencies,
2858                * because they're set up for ip4-arp, not this node...
2859                */
2860               if (PREDICT_FALSE(adj0->lookup_next_index
2861                                 == IP_LOOKUP_NEXT_ARP))
2862                 next0_override = IP4_REWRITE_NEXT_ARP;
2863             }
2864
2865           /* Guess we are only writing on simple Ethernet header. */
2866           vnet_rewrite_one_header (adj0[0], ip0, 
2867                                    sizeof (ethernet_header_t));
2868           
2869           /* Update packet buffer attributes/set output interface. */
2870           rw_len0 = adj0[0].rewrite_header.data_bytes;
2871           
2872           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2873               vlib_increment_combined_counter 
2874                   (&lm->adjacency_counters,
2875                    cpu_index, adj_index0, 
2876                    /* packet increment */ 0,
2877                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2878           
2879           /* Check MTU of outgoing interface. */
2880           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2881                     > adj0[0].rewrite_header.max_l3_packet_bytes
2882                     ? IP4_ERROR_MTU_EXCEEDED
2883                     : error0);
2884
2885           p0->error = error_node->errors[error0];
2886
2887           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2888            * to see the IP headerr */
2889           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2890             {
2891               p0->current_data -= rw_len0;
2892               p0->current_length += rw_len0;
2893
2894               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2895                   adj0[0].rewrite_header.sw_if_index;
2896               next0 = adj0[0].rewrite_header.next_index;
2897             }
2898
2899           if (rewrite_for_locally_received_packets)
2900               next0 = next0 && next0_override ? next0_override : next0;
2901
2902           from += 1;
2903           n_left_from -= 1;
2904           to_next += 1;
2905           n_left_to_next -= 1;
2906       
2907           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2908                                            to_next, n_left_to_next,
2909                                            pi0, next0);
2910         }
2911   
2912       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2913     }
2914
2915   /* Need to do trace after rewrites to pick up new packet data. */
2916   if (node->flags & VLIB_NODE_FLAG_TRACE)
2917     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2918
2919   return frame->n_vectors;
2920 }
2921
2922
2923 /** \brief IPv4 transit rewrite node.
2924     @node ip4-rewrite-transit
2925
2926     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2927     header checksum, fetch the ip adjacency, check the outbound mtu,
2928     apply the adjacency rewrite, and send pkts to the adjacency
2929     rewrite header's rewrite_next_index.
2930
2931     @param vm vlib_main_t corresponding to the current thread
2932     @param node vlib_node_runtime_t
2933     @param frame vlib_frame_t whose contents should be dispatched
2934
2935     @par Graph mechanics: buffer metadata, next index usage
2936
2937     @em Uses:
2938     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2939         - the rewrite adjacency index
2940     - <code>adj->lookup_next_index</code>
2941         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2942           the packet will be dropped. 
2943     - <code>adj->rewrite_header</code>
2944         - Rewrite string length, rewrite string, next_index
2945
2946     @em Sets:
2947     - <code>b->current_data, b->current_length</code>
2948         - Updated net of applying the rewrite string
2949
2950     <em>Next Indices:</em>
2951     - <code> adj->rewrite_header.next_index </code>
2952       or @c error-drop 
2953 */
2954 static uword
2955 ip4_rewrite_transit (vlib_main_t * vm,
2956                      vlib_node_runtime_t * node,
2957                      vlib_frame_t * frame)
2958 {
2959   return ip4_rewrite_inline (vm, node, frame,
2960                              /* rewrite_for_locally_received_packets */ 0);
2961 }
2962
2963 /** \brief IPv4 local rewrite node.
2964     @node ip4-rewrite-local
2965
2966     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2967     the outbound interface mtu, apply the adjacency rewrite, and send
2968     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2969     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2970     dst = interface addr."
2971
2972     @param vm vlib_main_t corresponding to the current thread
2973     @param node vlib_node_runtime_t
2974     @param frame vlib_frame_t whose contents should be dispatched
2975
2976     @par Graph mechanics: buffer metadata, next index usage
2977
2978     @em Uses:
2979     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2980         - the rewrite adjacency index
2981     - <code>adj->lookup_next_index</code>
2982         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2983           the packet will be dropped. 
2984     - <code>adj->rewrite_header</code>
2985         - Rewrite string length, rewrite string, next_index
2986
2987     @em Sets:
2988     - <code>b->current_data, b->current_length</code>
2989         - Updated net of applying the rewrite string
2990
2991     <em>Next Indices:</em>
2992     - <code> adj->rewrite_header.next_index </code>
2993       or @c error-drop 
2994 */
2995
2996 static uword
2997 ip4_rewrite_local (vlib_main_t * vm,
2998                    vlib_node_runtime_t * node,
2999                    vlib_frame_t * frame)
3000 {
3001   return ip4_rewrite_inline (vm, node, frame,
3002                              /* rewrite_for_locally_received_packets */ 1);
3003 }
3004
3005 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3006   .function = ip4_rewrite_transit,
3007   .name = "ip4-rewrite-transit",
3008   .vector_size = sizeof (u32),
3009
3010   .format_trace = format_ip4_rewrite_trace,
3011
3012   .n_next_nodes = 3,
3013   .next_nodes = {
3014     [IP4_REWRITE_NEXT_DROP] = "error-drop",
3015     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3016     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3017   },
3018 };
3019
3020 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
3021
3022 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3023   .function = ip4_rewrite_local,
3024   .name = "ip4-rewrite-local",
3025   .vector_size = sizeof (u32),
3026
3027   .sibling_of = "ip4-rewrite-transit",
3028
3029   .format_trace = format_ip4_rewrite_trace,
3030
3031   .n_next_nodes = 0,
3032 };
3033
3034 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
3035
3036 static clib_error_t *
3037 add_del_interface_table (vlib_main_t * vm,
3038                          unformat_input_t * input,
3039                          vlib_cli_command_t * cmd)
3040 {
3041   vnet_main_t * vnm = vnet_get_main();
3042   clib_error_t * error = 0;
3043   u32 sw_if_index, table_id;
3044
3045   sw_if_index = ~0;
3046
3047   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3048     {
3049       error = clib_error_return (0, "unknown interface `%U'",
3050                                  format_unformat_error, input);
3051       goto done;
3052     }
3053
3054   if (unformat (input, "%d", &table_id))
3055     ;
3056   else
3057     {
3058       error = clib_error_return (0, "expected table id `%U'",
3059                                  format_unformat_error, input);
3060       goto done;
3061     }
3062
3063   {
3064     ip4_main_t * im = &ip4_main;
3065     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3066
3067     if (fib) 
3068       {
3069         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3070         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3071     }
3072   }
3073
3074  done:
3075   return error;
3076 }
3077
3078 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3079   .path = "set interface ip table",
3080   .function = add_del_interface_table,
3081   .short_help = "Add/delete FIB table id for interface",
3082 };
3083
3084
3085 static uword
3086 ip4_lookup_multicast (vlib_main_t * vm,
3087                       vlib_node_runtime_t * node,
3088                       vlib_frame_t * frame)
3089 {
3090   ip4_main_t * im = &ip4_main;
3091   ip_lookup_main_t * lm = &im->lookup_main;
3092   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3093   u32 n_left_from, n_left_to_next, * from, * to_next;
3094   ip_lookup_next_t next;
3095   u32 cpu_index = os_get_cpu_number();
3096
3097   from = vlib_frame_vector_args (frame);
3098   n_left_from = frame->n_vectors;
3099   next = node->cached_next_index;
3100
3101   while (n_left_from > 0)
3102     {
3103       vlib_get_next_frame (vm, node, next,
3104                            to_next, n_left_to_next);
3105
3106       while (n_left_from >= 4 && n_left_to_next >= 2)
3107         {
3108           vlib_buffer_t * p0, * p1;
3109           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3110           ip_lookup_next_t next0, next1;
3111           ip4_header_t * ip0, * ip1;
3112           ip_adjacency_t * adj0, * adj1;
3113           u32 fib_index0, fib_index1;
3114           u32 flow_hash_config0, flow_hash_config1;
3115
3116           /* Prefetch next iteration. */
3117           {
3118             vlib_buffer_t * p2, * p3;
3119
3120             p2 = vlib_get_buffer (vm, from[2]);
3121             p3 = vlib_get_buffer (vm, from[3]);
3122
3123             vlib_prefetch_buffer_header (p2, LOAD);
3124             vlib_prefetch_buffer_header (p3, LOAD);
3125
3126             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3127             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3128           }
3129
3130           pi0 = to_next[0] = from[0];
3131           pi1 = to_next[1] = from[1];
3132
3133           p0 = vlib_get_buffer (vm, pi0);
3134           p1 = vlib_get_buffer (vm, pi1);
3135
3136           ip0 = vlib_buffer_get_current (p0);
3137           ip1 = vlib_buffer_get_current (p1);
3138
3139           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3140           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3141           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3142             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3143           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3144             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3145
3146           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3147                                               &ip0->dst_address, p0);
3148           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3149                                               &ip1->dst_address, p1);
3150
3151           adj0 = ip_get_adjacency (lm, adj_index0);
3152           adj1 = ip_get_adjacency (lm, adj_index1);
3153
3154           next0 = adj0->lookup_next_index;
3155           next1 = adj1->lookup_next_index;
3156
3157           flow_hash_config0 = 
3158               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3159
3160           flow_hash_config1 = 
3161               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3162
3163           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3164               (ip0, flow_hash_config0);
3165                                                                   
3166           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3167               (ip1, flow_hash_config1);
3168
3169           ASSERT (adj0->n_adj > 0);
3170           ASSERT (adj1->n_adj > 0);
3171           ASSERT (is_pow2 (adj0->n_adj));
3172           ASSERT (is_pow2 (adj1->n_adj));
3173           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3174           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3175
3176           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3177           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3178
3179           if (1) /* $$$$$$ HACK FIXME */
3180           vlib_increment_combined_counter 
3181               (cm, cpu_index, adj_index0, 1,
3182                vlib_buffer_length_in_chain (vm, p0));
3183           if (1) /* $$$$$$ HACK FIXME */
3184           vlib_increment_combined_counter 
3185               (cm, cpu_index, adj_index1, 1,
3186                vlib_buffer_length_in_chain (vm, p1));
3187
3188           from += 2;
3189           to_next += 2;
3190           n_left_to_next -= 2;
3191           n_left_from -= 2;
3192
3193           wrong_next = (next0 != next) + 2*(next1 != next);
3194           if (PREDICT_FALSE (wrong_next != 0))
3195             {
3196               switch (wrong_next)
3197                 {
3198                 case 1:
3199                   /* A B A */
3200                   to_next[-2] = pi1;
3201                   to_next -= 1;
3202                   n_left_to_next += 1;
3203                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3204                   break;
3205
3206                 case 2:
3207                   /* A A B */
3208                   to_next -= 1;
3209                   n_left_to_next += 1;
3210                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3211                   break;
3212
3213                 case 3:
3214                   /* A B C */
3215                   to_next -= 2;
3216                   n_left_to_next += 2;
3217                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3218                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3219                   if (next0 == next1)
3220                     {
3221                       /* A B B */
3222                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3223                       next = next1;
3224                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3225                     }
3226                 }
3227             }
3228         }
3229     
3230       while (n_left_from > 0 && n_left_to_next > 0)
3231         {
3232           vlib_buffer_t * p0;
3233           ip4_header_t * ip0;
3234           u32 pi0, adj_index0;
3235           ip_lookup_next_t next0;
3236           ip_adjacency_t * adj0;
3237           u32 fib_index0;
3238           u32 flow_hash_config0;
3239
3240           pi0 = from[0];
3241           to_next[0] = pi0;
3242
3243           p0 = vlib_get_buffer (vm, pi0);
3244
3245           ip0 = vlib_buffer_get_current (p0);
3246
3247           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3248                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3249           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3250               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3251           
3252           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3253                                               &ip0->dst_address, p0);
3254
3255           adj0 = ip_get_adjacency (lm, adj_index0);
3256
3257           next0 = adj0->lookup_next_index;
3258
3259           flow_hash_config0 = 
3260               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3261
3262           vnet_buffer (p0)->ip.flow_hash = 
3263             ip4_compute_flow_hash (ip0, flow_hash_config0);
3264
3265           ASSERT (adj0->n_adj > 0);
3266           ASSERT (is_pow2 (adj0->n_adj));
3267           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3268
3269           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3270
3271           if (1) /* $$$$$$ HACK FIXME */
3272               vlib_increment_combined_counter 
3273                   (cm, cpu_index, adj_index0, 1,
3274                    vlib_buffer_length_in_chain (vm, p0));
3275
3276           from += 1;
3277           to_next += 1;
3278           n_left_to_next -= 1;
3279           n_left_from -= 1;
3280
3281           if (PREDICT_FALSE (next0 != next))
3282             {
3283               n_left_to_next += 1;
3284               vlib_put_next_frame (vm, node, next, n_left_to_next);
3285               next = next0;
3286               vlib_get_next_frame (vm, node, next,
3287                                    to_next, n_left_to_next);
3288               to_next[0] = pi0;
3289               to_next += 1;
3290               n_left_to_next -= 1;
3291             }
3292         }
3293
3294       vlib_put_next_frame (vm, node, next, n_left_to_next);
3295     }
3296
3297   if (node->flags & VLIB_NODE_FLAG_TRACE)
3298       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3299
3300   return frame->n_vectors;
3301 }
3302
3303 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3304   .function = ip4_lookup_multicast,
3305   .name = "ip4-lookup-multicast",
3306   .vector_size = sizeof (u32),
3307   .sibling_of = "ip4-lookup",
3308   .format_trace = format_ip4_lookup_trace,
3309
3310   .n_next_nodes = 0,
3311 };
3312
3313 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3314
3315 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3316   .function = ip4_drop,
3317   .name = "ip4-multicast",
3318   .vector_size = sizeof (u32),
3319
3320   .format_trace = format_ip4_forward_next_trace,
3321
3322   .n_next_nodes = 1,
3323   .next_nodes = {
3324     [0] = "error-drop",
3325   },
3326 };
3327
3328 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3329 {
3330   ip4_main_t * im = &ip4_main;
3331   ip4_fib_mtrie_t * mtrie0;
3332   ip4_fib_mtrie_leaf_t leaf0;
3333   u32 adj_index0;
3334     
3335   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3336
3337   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3338   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3339   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3340   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3341   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3342   
3343   /* Handle default route. */
3344   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3345   
3346   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3347   
3348   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3349                                                   a, 
3350                                                   /* no_default_route */ 0);
3351 }
3352  
3353 static clib_error_t *
3354 test_lookup_command_fn (vlib_main_t * vm,
3355                         unformat_input_t * input,
3356                         vlib_cli_command_t * cmd)
3357 {
3358   u32 table_id = 0;
3359   f64 count = 1;
3360   u32 n;
3361   int i;
3362   ip4_address_t ip4_base_address;
3363   u64 errors = 0;
3364
3365   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3366       if (unformat (input, "table %d", &table_id))
3367         ;
3368       else if (unformat (input, "count %f", &count))
3369         ;
3370
3371       else if (unformat (input, "%U",
3372                          unformat_ip4_address, &ip4_base_address))
3373         ;
3374       else
3375         return clib_error_return (0, "unknown input `%U'",
3376                                   format_unformat_error, input);
3377   }
3378
3379   n = count;
3380
3381   for (i = 0; i < n; i++)
3382     {
3383       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3384         errors++;
3385
3386       ip4_base_address.as_u32 = 
3387         clib_host_to_net_u32 (1 + 
3388                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3389     }
3390
3391   if (errors) 
3392     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3393   else
3394     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3395
3396   return 0;
3397 }
3398
3399 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3400     .path = "test lookup",
3401     .short_help = "test lookup",
3402     .function = test_lookup_command_fn,
3403 };
3404
3405 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3406 {
3407   ip4_main_t * im4 = &ip4_main;
3408   ip4_fib_t * fib;
3409   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3410
3411   if (p == 0)
3412     return VNET_API_ERROR_NO_SUCH_FIB;
3413
3414   fib = vec_elt_at_index (im4->fibs, p[0]);
3415
3416   fib->flow_hash_config = flow_hash_config;
3417   return 0;
3418 }
3419  
3420 static clib_error_t *
3421 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3422                              unformat_input_t * input,
3423                              vlib_cli_command_t * cmd)
3424 {
3425   int matched = 0;
3426   u32 table_id = 0;
3427   u32 flow_hash_config = 0;
3428   int rv;
3429
3430   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3431     if (unformat (input, "table %d", &table_id))
3432       matched = 1;
3433 #define _(a,v) \
3434     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3435     foreach_flow_hash_bit
3436 #undef _
3437     else break;
3438   }
3439   
3440   if (matched == 0)
3441     return clib_error_return (0, "unknown input `%U'",
3442                               format_unformat_error, input);
3443   
3444   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3445   switch (rv)
3446     {
3447     case 0:
3448       break;
3449       
3450     case VNET_API_ERROR_NO_SUCH_FIB:
3451       return clib_error_return (0, "no such FIB table %d", table_id);
3452       
3453     default:
3454       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3455       break;
3456     }
3457   
3458   return 0;
3459 }
3460  
3461 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3462   .path = "set ip flow-hash",
3463   .short_help = 
3464   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3465   .function = set_ip_flow_hash_command_fn,
3466 };
3467  
3468 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3469                                  u32 table_index)
3470 {
3471   vnet_main_t * vnm = vnet_get_main();
3472   vnet_interface_main_t * im = &vnm->interface_main;
3473   ip4_main_t * ipm = &ip4_main;
3474   ip_lookup_main_t * lm = &ipm->lookup_main;
3475   vnet_classify_main_t * cm = &vnet_classify_main;
3476
3477   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3478     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3479
3480   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3481     return VNET_API_ERROR_NO_SUCH_ENTRY;
3482
3483   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3484   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3485
3486   return 0;
3487 }
3488
3489 static clib_error_t *
3490 set_ip_classify_command_fn (vlib_main_t * vm,
3491                             unformat_input_t * input,
3492                             vlib_cli_command_t * cmd)
3493 {
3494   u32 table_index = ~0;
3495   int table_index_set = 0;
3496   u32 sw_if_index = ~0;
3497   int rv;
3498   
3499   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3500     if (unformat (input, "table-index %d", &table_index))
3501       table_index_set = 1;
3502     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3503                        vnet_get_main(), &sw_if_index))
3504       ;
3505     else
3506       break;
3507   }
3508       
3509   if (table_index_set == 0)
3510     return clib_error_return (0, "classify table-index must be specified");
3511
3512   if (sw_if_index == ~0)
3513     return clib_error_return (0, "interface / subif must be specified");
3514
3515   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3516
3517   switch (rv)
3518     {
3519     case 0:
3520       break;
3521
3522     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3523       return clib_error_return (0, "No such interface");
3524
3525     case VNET_API_ERROR_NO_SUCH_ENTRY:
3526       return clib_error_return (0, "No such classifier table");
3527     }
3528   return 0;
3529 }
3530
3531 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3532     .path = "set ip classify",
3533     .short_help = 
3534     "set ip classify intfc <int> table-index <index>",
3535     .function = set_ip_classify_command_fn,
3536 };
3537