Add some doxygen tags
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47
48 /** \file
49     vnet ip4 forwarding 
50 */
51
52 /* This is really, really simple but stupid fib. */
53 u32
54 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
55                            ip4_address_t * dst,
56                            u32 disable_default_route)
57 {
58   ip_lookup_main_t * lm = &im->lookup_main;
59   ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
60   uword * p, * hash, key;
61   i32 i, i_min, dst_address, ai;
62
63   i_min = disable_default_route ? 1 : 0;
64   dst_address = clib_mem_unaligned (&dst->data_u32, u32);
65   for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
66     {
67       hash = fib->adj_index_by_dst_address[i];
68       if (! hash)
69         continue;
70
71       key = dst_address & im->fib_masks[i];
72       if ((p = hash_get (hash, key)) != 0)
73         {
74           ai = p[0];
75           goto done;
76         }
77     }
78     
79   /* Nothing matches in table. */
80   ai = lm->miss_adj_index;
81
82  done:
83   return ai;
84 }
85
86 static ip4_fib_t *
87 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
88 {
89   ip4_fib_t * fib;
90   hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
91   vec_add2 (im->fibs, fib, 1);
92   fib->table_id = table_id;
93   fib->index = fib - im->fibs;
94   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
95   fib->fwd_classify_table_index = ~0;
96   fib->rev_classify_table_index = ~0;
97   ip4_mtrie_init (&fib->mtrie);
98   return fib;
99 }
100
101 ip4_fib_t *
102 find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
103                                    u32 table_index_or_id, u32 flags)
104 {
105   uword * p, fib_index;
106
107   fib_index = table_index_or_id;
108   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
109     {
110       if (table_index_or_id == ~0) {
111         table_index_or_id = 0;
112         while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
113           table_index_or_id++;
114         }
115         return create_fib_with_table_id (im, table_index_or_id);
116       }
117
118       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
119       if (! p)
120         return create_fib_with_table_id (im, table_index_or_id);
121       fib_index = p[0];
122     }
123   return vec_elt_at_index (im->fibs, fib_index);
124 }
125
126 static void
127 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
128                                        ip4_fib_t * fib,
129                                        u32 address_length)
130 {
131   hash_t * h;
132   uword max_index;
133
134   ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
135   lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
136
137   fib->adj_index_by_dst_address[address_length] =
138     hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
139
140   hash_set_flags (fib->adj_index_by_dst_address[address_length],
141                   HASH_FLAG_NO_AUTO_SHRINK);
142
143   h = hash_header (fib->adj_index_by_dst_address[address_length]);
144   max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
145
146   /* Initialize new/old hash value vectors. */
147   vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
148   vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
149 }
150
151 static void
152 ip4_fib_set_adj_index (ip4_main_t * im,
153                        ip4_fib_t * fib,
154                        u32 flags,
155                        u32 dst_address_u32,
156                        u32 dst_address_length,
157                        u32 adj_index)
158 {
159   ip_lookup_main_t * lm = &im->lookup_main;
160   uword * hash;
161
162   if (vec_bytes(fib->old_hash_values))
163     memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
164   if (vec_bytes(fib->new_hash_values))
165     memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
166   fib->new_hash_values[0] = adj_index;
167
168   /* Make sure adj index is valid. */
169   if (CLIB_DEBUG > 0)
170     (void) ip_get_adjacency (lm, adj_index);
171
172   hash = fib->adj_index_by_dst_address[dst_address_length];
173
174   hash = _hash_set3 (hash, dst_address_u32,
175                      fib->new_hash_values,
176                      fib->old_hash_values);
177
178   fib->adj_index_by_dst_address[dst_address_length] = hash;
179
180   if (vec_len (im->add_del_route_callbacks) > 0)
181     {
182       ip4_add_del_route_callback_t * cb;
183       ip4_address_t d;
184       uword * p;
185
186       d.data_u32 = dst_address_u32;
187       vec_foreach (cb, im->add_del_route_callbacks)
188         if ((flags & cb->required_flags) == cb->required_flags)
189           cb->function (im, cb->function_opaque,
190                         fib, flags,
191                         &d, dst_address_length,
192                         fib->old_hash_values,
193                         fib->new_hash_values);
194
195       p = hash_get (hash, dst_address_u32);
196       clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
197     }
198 }
199
200 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
201 {
202   ip_lookup_main_t * lm = &im->lookup_main;
203   ip4_fib_t * fib;
204   u32 dst_address, dst_address_length, adj_index, old_adj_index;
205   uword * hash, is_del;
206   ip4_add_del_route_callback_t * cb;
207
208   /* Either create new adjacency or use given one depending on arguments. */
209   if (a->n_add_adj > 0)
210     {
211       ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
212       ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
213     }
214   else
215     adj_index = a->adj_index;
216
217   dst_address = a->dst_address.data_u32;
218   dst_address_length = a->dst_address_length;
219   fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
220
221   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
222   dst_address &= im->fib_masks[dst_address_length];
223
224   if (! fib->adj_index_by_dst_address[dst_address_length])
225     ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
226
227   hash = fib->adj_index_by_dst_address[dst_address_length];
228
229   is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
230
231   if (is_del)
232     {
233       fib->old_hash_values[0] = ~0;
234       hash = _hash_unset (hash, dst_address, fib->old_hash_values);
235       fib->adj_index_by_dst_address[dst_address_length] = hash;
236
237       if (vec_len (im->add_del_route_callbacks) > 0
238           && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
239         {
240           fib->new_hash_values[0] = ~0;
241           vec_foreach (cb, im->add_del_route_callbacks)
242             if ((a->flags & cb->required_flags) == cb->required_flags)
243               cb->function (im, cb->function_opaque,
244                             fib, a->flags,
245                             &a->dst_address, dst_address_length,
246                             fib->old_hash_values,
247                             fib->new_hash_values);
248         }
249     }
250   else
251     ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
252                            adj_index);
253
254   old_adj_index = fib->old_hash_values[0];
255
256   /* Avoid spurious reference count increments */
257   if (old_adj_index == adj_index
258       && adj_index != ~0
259       && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
260     {
261       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
262       if (adj->share_count > 0)
263         adj->share_count --;
264     }
265
266   ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
267                                is_del ? old_adj_index : adj_index,
268                                is_del);
269
270   /* Delete old adjacency index if present and changed. */
271   if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
272       && old_adj_index != ~0
273       && old_adj_index != adj_index)
274     ip_del_adjacency (lm, old_adj_index);
275 }
276
277 void
278 ip4_add_del_route_next_hop (ip4_main_t * im,
279                             u32 flags,
280                             ip4_address_t * dst_address,
281                             u32 dst_address_length,
282                             ip4_address_t * next_hop,
283                             u32 next_hop_sw_if_index,
284                             u32 next_hop_weight, u32 adj_index, 
285                             u32 explicit_fib_index)
286 {
287   vnet_main_t * vnm = vnet_get_main();
288   ip_lookup_main_t * lm = &im->lookup_main;
289   u32 fib_index;
290   ip4_fib_t * fib;
291   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
292   u32 dst_adj_index, nh_adj_index;
293   uword * dst_hash, * dst_result;
294   uword * nh_hash, * nh_result;
295   ip_adjacency_t * dst_adj;
296   ip_multipath_adjacency_t * old_mp, * new_mp;
297   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
298   int is_interface_next_hop;
299   clib_error_t * error = 0;
300
301   if (explicit_fib_index == (u32)~0)
302       fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
303   else
304       fib_index = explicit_fib_index;
305
306   fib = vec_elt_at_index (im->fibs, fib_index);
307   
308   /* Lookup next hop to be added or deleted. */
309   is_interface_next_hop = next_hop->data_u32 == 0;
310   if (adj_index == (u32)~0)
311     {
312       if (is_interface_next_hop)
313         {
314           nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
315           if (nh_result)
316             nh_adj_index = *nh_result;
317           else
318             {
319               ip_adjacency_t * adj;
320               adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
321                                       &nh_adj_index);
322               ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
323               ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
324               hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
325             }
326         }
327       else
328         {
329           nh_hash = fib->adj_index_by_dst_address[32];
330           nh_result = hash_get (nh_hash, next_hop->data_u32);
331           
332           /* Next hop must be known. */
333           if (! nh_result)
334             {
335               ip_adjacency_t * adj;
336
337               nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
338                                                         next_hop, 0);
339               adj = ip_get_adjacency (lm, nh_adj_index);
340               /* if ARP interface adjacencty is present, we need to
341                  install ARP adjaceny for specific next hop */
342               if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
343                   adj->arp.next_hop.ip4.as_u32 == 0)
344                 {
345                   nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
346                 }
347               else
348                 {
349                   /* Next hop is not known, so create indirect adj */
350                   ip_adjacency_t add_adj;
351                   memset (&add_adj, 0, sizeof(add_adj));
352                   add_adj.n_adj = 1;
353                   add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
354                   add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
355                   add_adj.explicit_fib_index = explicit_fib_index;
356                   ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
357                 }
358             }
359           else
360             nh_adj_index = *nh_result;
361         }
362     }
363   else
364     {
365       nh_adj_index = adj_index;
366     }
367   ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
368   dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
369
370   dst_hash = fib->adj_index_by_dst_address[dst_address_length];
371   dst_result = hash_get (dst_hash, dst_address_u32);
372   if (dst_result)
373     {
374       dst_adj_index = dst_result[0];
375       dst_adj = ip_get_adjacency (lm, dst_adj_index);
376     }
377   else
378     {
379       /* For deletes destination must be known. */
380       if (is_del)
381         {
382           vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
383           error = clib_error_return (0, "unknown destination %U/%d",
384                                      format_ip4_address, dst_address,
385                                      dst_address_length);
386           goto done;
387         }
388
389       dst_adj_index = ~0;
390       dst_adj = 0;
391     }
392
393   /* Ignore adds of X/32 with next hop of X. */
394   if (! is_del
395       && dst_address_length == 32
396       && dst_address->data_u32 == next_hop->data_u32 
397       && adj_index != (u32)~0)
398     {
399       vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
400       error = clib_error_return (0, "prefix matches next hop %U/%d",
401                                  format_ip4_address, dst_address,
402                                  dst_address_length);
403       goto done;
404     }
405
406   /* Destination is not known and default weight is set so add route
407      to existing non-multipath adjacency */
408   if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
409     {
410       /* create / delete additional mapping of existing adjacency */
411       ip4_add_del_route_args_t a;
412       ip_adjacency_t * nh_adj = ip_get_adjacency (lm, nh_adj_index);
413
414       a.table_index_or_table_id = fib_index;
415       a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
416                  | IP4_ROUTE_FLAG_FIB_INDEX
417                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
418                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
419                              | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
420       a.dst_address = dst_address[0];
421       a.dst_address_length = dst_address_length;
422       a.adj_index = nh_adj_index;
423       a.add_adj = 0;
424       a.n_add_adj = 0;
425
426       ip4_add_del_route (im, &a);
427
428       /* adjust share count. This cannot be the only use of the adjacency */
429       nh_adj->share_count += is_del ? -1 : 1;
430         
431       goto done;
432     }
433
434   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
435
436   if (! ip_multipath_adjacency_add_del_next_hop
437       (lm, is_del,
438        old_mp_adj_index,
439        nh_adj_index,
440        next_hop_weight,
441        &new_mp_adj_index))
442     {
443       vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
444       error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
445                                  format_ip4_address, next_hop);
446       goto done;
447     }
448   
449   old_mp = new_mp = 0;
450   if (old_mp_adj_index != ~0)
451     old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
452   if (new_mp_adj_index != ~0)
453     new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
454
455   if (old_mp != new_mp)
456     {
457       ip4_add_del_route_args_t a;
458       ip_adjacency_t * adj;
459
460       a.table_index_or_table_id = fib_index;
461       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
462                  | IP4_ROUTE_FLAG_FIB_INDEX
463                  | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
464                  | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
465       a.dst_address = dst_address[0];
466       a.dst_address_length = dst_address_length;
467       a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
468       a.add_adj = 0;
469       a.n_add_adj = 0;
470
471       ip4_add_del_route (im, &a);
472
473       adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
474       if (adj->n_adj == 1)
475         adj->share_count += is_del ? -1 : 1;
476     }
477
478  done:
479   if (error)
480     clib_error_report (error);
481 }
482
483 void *
484 ip4_get_route (ip4_main_t * im,
485                u32 table_index_or_table_id,
486                u32 flags,
487                u8 * address,
488                u32 address_length)
489 {
490   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
491   u32 dst_address = * (u32 *) address;
492   uword * hash, * p;
493
494   ASSERT (address_length < ARRAY_LEN (im->fib_masks));
495   dst_address &= im->fib_masks[address_length];
496
497   hash = fib->adj_index_by_dst_address[address_length];
498   p = hash_get (hash, dst_address);
499   return (void *) p;
500 }
501
502 void
503 ip4_foreach_matching_route (ip4_main_t * im,
504                             u32 table_index_or_table_id,
505                             u32 flags,
506                             ip4_address_t * address,
507                             u32 address_length,
508                             ip4_address_t ** results,
509                             u8 ** result_lengths)
510 {
511   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
512   u32 dst_address = address->data_u32;
513   u32 this_length = address_length;
514   
515   if (*results)
516     _vec_len (*results) = 0;
517   if (*result_lengths)
518     _vec_len (*result_lengths) = 0;
519
520   while (this_length <= 32 && vec_len (results) == 0)
521     {
522       uword k, v;
523       hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
524         if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
525           {
526             ip4_address_t a;
527             a.data_u32 = k;
528             vec_add1 (*results, a);
529             vec_add1 (*result_lengths, this_length);
530           }
531       }));
532
533       this_length++;
534     }
535 }
536
537 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
538                                   u32 table_index_or_table_id,
539                                   u32 flags)
540 {
541   ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
542   ip_lookup_main_t * lm = &im->lookup_main;
543   u32 i, l;
544   ip4_address_t a;
545   ip4_add_del_route_callback_t * cb;
546   static ip4_address_t * to_delete;
547
548   if (lm->n_adjacency_remaps == 0)
549     return;
550
551   for (l = 0; l <= 32; l++)
552     {
553       hash_pair_t * p;
554       uword * hash = fib->adj_index_by_dst_address[l];
555
556       if (hash_elts (hash) == 0)
557         continue;
558
559       if (to_delete)
560         _vec_len (to_delete) = 0;
561
562       hash_foreach_pair (p, hash, ({
563         u32 adj_index = p->value[0];
564         u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
565
566         if (m)
567           {
568             /* Record destination address from hash key. */
569             a.data_u32 = p->key;
570
571             /* New adjacency points to nothing: so delete prefix. */
572             if (m == ~0)
573               vec_add1 (to_delete, a);
574             else
575               {
576                 /* Remap to new adjacency. */
577                 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
578
579                 /* Set new adjacency value. */
580                 fib->new_hash_values[0] = p->value[0] = m - 1;
581
582                 vec_foreach (cb, im->add_del_route_callbacks)
583                   if ((flags & cb->required_flags) == cb->required_flags)
584                     cb->function (im, cb->function_opaque,
585                                   fib, flags | IP4_ROUTE_FLAG_ADD,
586                                   &a, l,
587                                   fib->old_hash_values,
588                                   fib->new_hash_values);
589               }
590           }
591       }));
592
593       fib->new_hash_values[0] = ~0;
594       for (i = 0; i < vec_len (to_delete); i++)
595         {
596           hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
597           vec_foreach (cb, im->add_del_route_callbacks)
598             if ((flags & cb->required_flags) == cb->required_flags)
599               cb->function (im, cb->function_opaque,
600                             fib, flags | IP4_ROUTE_FLAG_DEL,
601                             &a, l,
602                             fib->old_hash_values,
603                             fib->new_hash_values);
604         }
605     }
606
607   /* Also remap adjacencies in mtrie. */
608   ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
609
610   /* Reset mapping table. */
611   vec_zero (lm->adjacency_remap_table);
612
613   /* All remaps have been performed. */
614   lm->n_adjacency_remaps = 0;
615 }
616
617 void ip4_delete_matching_routes (ip4_main_t * im,
618                                  u32 table_index_or_table_id,
619                                  u32 flags,
620                                  ip4_address_t * address,
621                                  u32 address_length)
622 {
623   static ip4_address_t * matching_addresses;
624   static u8 * matching_address_lengths;
625   u32 l, i;
626   ip4_add_del_route_args_t a;
627
628   a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
629   a.table_index_or_table_id = table_index_or_table_id;
630   a.adj_index = ~0;
631   a.add_adj = 0;
632   a.n_add_adj = 0;
633
634   for (l = address_length + 1; l <= 32; l++)
635     {
636       ip4_foreach_matching_route (im, table_index_or_table_id, flags,
637                                   address,
638                                   l,
639                                   &matching_addresses,
640                                   &matching_address_lengths);
641       for (i = 0; i < vec_len (matching_addresses); i++)
642         {
643           a.dst_address = matching_addresses[i];
644           a.dst_address_length = matching_address_lengths[i];
645           ip4_add_del_route (im, &a);
646         }
647     }
648
649   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
650 }
651
652 void
653 ip4_forward_next_trace (vlib_main_t * vm,
654                         vlib_node_runtime_t * node,
655                         vlib_frame_t * frame,
656                         vlib_rx_or_tx_t which_adj_index);
657
658 always_inline uword
659 ip4_lookup_inline (vlib_main_t * vm,
660                    vlib_node_runtime_t * node,
661                    vlib_frame_t * frame,
662                    int lookup_for_responses_to_locally_received_packets,
663                    int is_indirect)
664 {
665   ip4_main_t * im = &ip4_main;
666   ip_lookup_main_t * lm = &im->lookup_main;
667   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
668   u32 n_left_from, n_left_to_next, * from, * to_next;
669   ip_lookup_next_t next;
670   u32 cpu_index = os_get_cpu_number();
671
672   from = vlib_frame_vector_args (frame);
673   n_left_from = frame->n_vectors;
674   next = node->cached_next_index;
675
676   while (n_left_from > 0)
677     {
678       vlib_get_next_frame (vm, node, next,
679                            to_next, n_left_to_next);
680
681       while (n_left_from >= 4 && n_left_to_next >= 2)
682         {
683           vlib_buffer_t * p0, * p1;
684           ip4_header_t * ip0, * ip1;
685           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
686           ip_lookup_next_t next0, next1;
687           ip_adjacency_t * adj0, * adj1;
688           ip4_fib_mtrie_t * mtrie0, * mtrie1;
689           ip4_fib_mtrie_leaf_t leaf0, leaf1;
690           ip4_address_t * dst_addr0, *dst_addr1;
691           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
692           __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
693           u32 flow_hash_config0, flow_hash_config1;
694           u32 hash_c0, hash_c1;
695           u32 wrong_next;
696
697           /* Prefetch next iteration. */
698           {
699             vlib_buffer_t * p2, * p3;
700
701             p2 = vlib_get_buffer (vm, from[2]);
702             p3 = vlib_get_buffer (vm, from[3]);
703
704             vlib_prefetch_buffer_header (p2, LOAD);
705             vlib_prefetch_buffer_header (p3, LOAD);
706
707             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
708             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
709           }
710
711           pi0 = to_next[0] = from[0];
712           pi1 = to_next[1] = from[1];
713
714           p0 = vlib_get_buffer (vm, pi0);
715           p1 = vlib_get_buffer (vm, pi1);
716
717           ip0 = vlib_buffer_get_current (p0);
718           ip1 = vlib_buffer_get_current (p1);
719
720           if (is_indirect)
721             {
722               ip_adjacency_t * iadj0, * iadj1;
723               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
724               iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
725               dst_addr0 = &iadj0->indirect.next_hop.ip4;
726               dst_addr1 = &iadj1->indirect.next_hop.ip4;
727             }
728           else
729             {
730               dst_addr0 = &ip0->dst_address;
731               dst_addr1 = &ip1->dst_address;
732             }
733
734           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
735           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
736           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
737             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
738           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
739             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
740
741
742           if (! lookup_for_responses_to_locally_received_packets)
743             {
744               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
745               mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
746
747               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
748
749               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
750               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
751             }
752
753           tcp0 = (void *) (ip0 + 1);
754           tcp1 = (void *) (ip1 + 1);
755
756           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
757                          || ip0->protocol == IP_PROTOCOL_UDP);
758           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
759                          || ip1->protocol == IP_PROTOCOL_UDP);
760
761           if (! lookup_for_responses_to_locally_received_packets)
762             {
763               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
764               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
765             }
766
767           if (! lookup_for_responses_to_locally_received_packets)
768             {
769               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
770               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
771             }
772
773           if (! lookup_for_responses_to_locally_received_packets)
774             {
775               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
776               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
777             }
778
779           if (lookup_for_responses_to_locally_received_packets)
780             {
781               adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
782               adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
783             }
784           else
785             {
786               /* Handle default route. */
787               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
788               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
789
790               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
791               adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
792             }
793
794           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
795                                                            dst_addr0,
796                                                            /* no_default_route */ 0));
797           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
798                                                            dst_addr1,
799                                                            /* no_default_route */ 0));
800           adj0 = ip_get_adjacency (lm, adj_index0);
801           adj1 = ip_get_adjacency (lm, adj_index1);
802
803           next0 = adj0->lookup_next_index;
804           next1 = adj1->lookup_next_index;
805
806           /* Use flow hash to compute multipath adjacency. */
807           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
808           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
809           if (PREDICT_FALSE (adj0->n_adj > 1))
810             {
811               flow_hash_config0 = 
812                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
813               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
814                 ip4_compute_flow_hash (ip0, flow_hash_config0);
815             }
816           if (PREDICT_FALSE(adj1->n_adj > 1))
817             {
818               flow_hash_config1 = 
819                 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
820               hash_c1 = vnet_buffer (p1)->ip.flow_hash = 
821                 ip4_compute_flow_hash (ip1, flow_hash_config1);
822             }
823
824           ASSERT (adj0->n_adj > 0);
825           ASSERT (adj1->n_adj > 0);
826           ASSERT (is_pow2 (adj0->n_adj));
827           ASSERT (is_pow2 (adj1->n_adj));
828           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
829           adj_index1 += (hash_c1 & (adj1->n_adj - 1));
830
831           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
832           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
833
834           vlib_increment_combined_counter 
835               (cm, cpu_index, adj_index0, 1,
836                vlib_buffer_length_in_chain (vm, p0) 
837                + sizeof(ethernet_header_t));
838           vlib_increment_combined_counter 
839               (cm, cpu_index, adj_index1, 1,
840                vlib_buffer_length_in_chain (vm, p1)
841                + sizeof(ethernet_header_t));
842
843           from += 2;
844           to_next += 2;
845           n_left_to_next -= 2;
846           n_left_from -= 2;
847
848           wrong_next = (next0 != next) + 2*(next1 != next);
849           if (PREDICT_FALSE (wrong_next != 0))
850             {
851               switch (wrong_next)
852                 {
853                 case 1:
854                   /* A B A */
855                   to_next[-2] = pi1;
856                   to_next -= 1;
857                   n_left_to_next += 1;
858                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
859                   break;
860
861                 case 2:
862                   /* A A B */
863                   to_next -= 1;
864                   n_left_to_next += 1;
865                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
866                   break;
867
868                 case 3:
869                   /* A B C */
870                   to_next -= 2;
871                   n_left_to_next += 2;
872                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
873                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
874                   if (next0 == next1)
875                     {
876                       /* A B B */
877                       vlib_put_next_frame (vm, node, next, n_left_to_next);
878                       next = next1;
879                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
880                     }
881                 }
882             }
883         }
884     
885       while (n_left_from > 0 && n_left_to_next > 0)
886         {
887           vlib_buffer_t * p0;
888           ip4_header_t * ip0;
889           __attribute__((unused)) tcp_header_t * tcp0;
890           ip_lookup_next_t next0;
891           ip_adjacency_t * adj0;
892           ip4_fib_mtrie_t * mtrie0;
893           ip4_fib_mtrie_leaf_t leaf0;
894           ip4_address_t * dst_addr0;
895           __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
896           u32 flow_hash_config0, hash_c0;
897
898           pi0 = from[0];
899           to_next[0] = pi0;
900
901           p0 = vlib_get_buffer (vm, pi0);
902
903           ip0 = vlib_buffer_get_current (p0);
904
905           if (is_indirect)
906             {
907               ip_adjacency_t * iadj0;
908               iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
909               dst_addr0 = &iadj0->indirect.next_hop.ip4;
910             }
911           else
912             {
913               dst_addr0 = &ip0->dst_address;
914             }
915
916           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
917           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
918             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
919
920           if (! lookup_for_responses_to_locally_received_packets)
921             {
922               mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
923
924               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
925
926               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
927             }
928
929           tcp0 = (void *) (ip0 + 1);
930
931           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
932                          || ip0->protocol == IP_PROTOCOL_UDP);
933
934           if (! lookup_for_responses_to_locally_received_packets)
935             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
936
937           if (! lookup_for_responses_to_locally_received_packets)
938             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
939
940           if (! lookup_for_responses_to_locally_received_packets)
941             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
942
943           if (lookup_for_responses_to_locally_received_packets)
944             adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
945           else
946             {
947               /* Handle default route. */
948               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
949               adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
950             }
951
952           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
953                                                            dst_addr0,
954                                                            /* no_default_route */ 0));
955
956           adj0 = ip_get_adjacency (lm, adj_index0);
957
958           next0 = adj0->lookup_next_index;
959
960           /* Use flow hash to compute multipath adjacency. */
961           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
962           if (PREDICT_FALSE(adj0->n_adj > 1))
963             {
964               flow_hash_config0 = 
965                 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
966
967               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
968                 ip4_compute_flow_hash (ip0, flow_hash_config0);
969             }
970
971           ASSERT (adj0->n_adj > 0);
972           ASSERT (is_pow2 (adj0->n_adj));
973           adj_index0 += (hash_c0 & (adj0->n_adj - 1));
974
975           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
976
977           vlib_increment_combined_counter 
978               (cm, cpu_index, adj_index0, 1,
979                vlib_buffer_length_in_chain (vm, p0)
980                + sizeof(ethernet_header_t));
981
982           from += 1;
983           to_next += 1;
984           n_left_to_next -= 1;
985           n_left_from -= 1;
986
987           if (PREDICT_FALSE (next0 != next))
988             {
989               n_left_to_next += 1;
990               vlib_put_next_frame (vm, node, next, n_left_to_next);
991               next = next0;
992               vlib_get_next_frame (vm, node, next,
993                                    to_next, n_left_to_next);
994               to_next[0] = pi0;
995               to_next += 1;
996               n_left_to_next -= 1;
997             }
998         }
999
1000       vlib_put_next_frame (vm, node, next, n_left_to_next);
1001     }
1002
1003   if (node->flags & VLIB_NODE_FLAG_TRACE)
1004     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1005
1006   return frame->n_vectors;
1007 }
1008
1009 /** \brief IPv4 lookup node.
1010     @node ip4-lookup
1011
1012     This is the main IPv4 lookup dispatch node.
1013
1014     @param vm vlib_main_t corresponding to the current thread
1015     @param node vlib_node_runtime_t
1016     @param frame vlib_frame_t whose contents should be dispatched
1017
1018     @par Graph mechanics: buffer metadata, next index usage
1019
1020     @em Uses:
1021     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1022         - Indicates the @c sw_if_index value of the interface that the
1023           packet was received on.
1024     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1025         - When the value is @c ~0 then the node performs a longest prefix
1026           match (LPM) for the packet destination address in the FIB attached
1027           to the receive interface.
1028         - Otherwise perform LPM for the packet destination address in the
1029           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1030           value (0, 1, ...) and not a VRF id.
1031
1032     @em Sets:
1033     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1034         - The lookup result adjacency index.
1035
1036     <em>Next Index:</em>
1037     - Dispatches the packet to the node index found in
1038       ip_adjacency_t @c adj->lookup_next_index
1039       (where @c adj is the lookup result adjacency).
1040 */
1041 static uword
1042 ip4_lookup (vlib_main_t * vm,
1043             vlib_node_runtime_t * node,
1044             vlib_frame_t * frame)
1045 {
1046   return ip4_lookup_inline (vm, node, frame,
1047                             /* lookup_for_responses_to_locally_received_packets */ 0,
1048                             /* is_indirect */ 0);
1049
1050 }
1051
1052 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1053                                         ip_adjacency_t * adj,
1054                                         u32 sw_if_index,
1055                                         u32 if_address_index)
1056 {
1057   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1058   ip_lookup_next_t n;
1059   vnet_l3_packet_type_t packet_type;
1060   u32 node_index;
1061
1062   if (hw->hw_class_index == ethernet_hw_interface_class.index
1063       || hw->hw_class_index == srp_hw_interface_class.index)
1064     {
1065       /* 
1066        * We have a bit of a problem in this case. ip4-arp uses
1067        * the rewrite_header.next_index to hand pkts to the
1068        * indicated inteface output node. We can end up in
1069        * ip4_rewrite_local, too, which also pays attention to 
1070        * rewrite_header.next index. Net result: a hack in
1071        * ip4_rewrite_local...
1072        */
1073       n = IP_LOOKUP_NEXT_ARP;
1074       node_index = ip4_arp_node.index;
1075       adj->if_address_index = if_address_index;
1076       adj->arp.next_hop.ip4.as_u32 = 0;
1077       ip46_address_reset(&adj->arp.next_hop);
1078       packet_type = VNET_L3_PACKET_TYPE_ARP;
1079     }
1080   else
1081     {
1082       n = IP_LOOKUP_NEXT_REWRITE;
1083       node_index = ip4_rewrite_node.index;
1084       packet_type = VNET_L3_PACKET_TYPE_IP4;
1085     }
1086
1087   adj->lookup_next_index = n;
1088   vnet_rewrite_for_sw_interface
1089     (vnm,
1090      packet_type,
1091      sw_if_index,
1092      node_index,
1093      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1094      &adj->rewrite_header,
1095      sizeof (adj->rewrite_data));
1096 }
1097
1098 static void
1099 ip4_add_interface_routes (u32 sw_if_index,
1100                           ip4_main_t * im, u32 fib_index,
1101                           ip_interface_address_t * a)
1102 {
1103   vnet_main_t * vnm = vnet_get_main();
1104   ip_lookup_main_t * lm = &im->lookup_main;
1105   ip_adjacency_t * adj;
1106   ip4_address_t * address = ip_interface_address_get_address (lm, a);
1107   ip4_add_del_route_args_t x;
1108   vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1109   u32 classify_table_index;
1110
1111   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1112   x.table_index_or_table_id = fib_index;
1113   x.flags = (IP4_ROUTE_FLAG_ADD
1114              | IP4_ROUTE_FLAG_FIB_INDEX
1115              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1116   x.dst_address = address[0];
1117   x.dst_address_length = a->address_length;
1118   x.n_add_adj = 0;
1119   x.add_adj = 0;
1120
1121   a->neighbor_probe_adj_index = ~0;
1122   if (a->address_length < 32)
1123     {
1124       adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1125                               &x.adj_index);
1126       ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1127       ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1128       ip4_add_del_route (im, &x);
1129       a->neighbor_probe_adj_index = x.adj_index;
1130     }
1131   
1132   /* Add e.g. 1.1.1.1/32 as local to this host. */
1133   adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1134                           &x.adj_index);
1135   
1136   classify_table_index = ~0;
1137   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1138     classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1139   if (classify_table_index != (u32) ~0)
1140     {
1141       adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1142       adj->classify.table_index = classify_table_index;
1143     }
1144   else
1145     adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1146   
1147   adj->if_address_index = a - lm->if_address_pool;
1148   adj->rewrite_header.sw_if_index = sw_if_index;
1149   adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1150   /* 
1151    * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1152    * fail an RPF-ish check, but still go thru the rewrite code...
1153    */
1154   adj->rewrite_header.data_bytes = 0;
1155
1156   ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1157   x.dst_address_length = 32;
1158   ip4_add_del_route (im, &x);
1159 }
1160
1161 static void
1162 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1163 {
1164   ip4_add_del_route_args_t x;
1165
1166   /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1167   x.table_index_or_table_id = fib_index;
1168   x.flags = (IP4_ROUTE_FLAG_DEL
1169              | IP4_ROUTE_FLAG_FIB_INDEX
1170              | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1171   x.dst_address = address[0];
1172   x.dst_address_length = address_length;
1173   x.adj_index = ~0;
1174   x.n_add_adj = 0;
1175   x.add_adj = 0;
1176
1177   if (address_length < 32)
1178     ip4_add_del_route (im, &x);
1179
1180   x.dst_address_length = 32;
1181   ip4_add_del_route (im, &x);
1182
1183   ip4_delete_matching_routes (im,
1184                               fib_index,
1185                               IP4_ROUTE_FLAG_FIB_INDEX,
1186                               address,
1187                               address_length);
1188 }
1189
1190 typedef struct {
1191     u32 sw_if_index;
1192     ip4_address_t address;
1193     u32 length;
1194 } ip4_interface_address_t;
1195
1196 static clib_error_t *
1197 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1198                                         u32 sw_if_index,
1199                                         ip4_address_t * new_address,
1200                                         u32 new_length,
1201                                         u32 redistribute,
1202                                         u32 insert_routes,
1203                                         u32 is_del);
1204
1205 static clib_error_t *
1206 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1207                                         u32 sw_if_index,
1208                                         ip4_address_t * address,
1209                                         u32 address_length,
1210                                         u32 redistribute,
1211                                         u32 insert_routes,
1212                                         u32 is_del)
1213 {
1214   vnet_main_t * vnm = vnet_get_main();
1215   ip4_main_t * im = &ip4_main;
1216   ip_lookup_main_t * lm = &im->lookup_main;
1217   clib_error_t * error = 0;
1218   u32 if_address_index, elts_before;
1219   ip4_address_fib_t ip4_af, * addr_fib = 0;
1220
1221   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1222   ip4_addr_fib_init (&ip4_af, address,
1223                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1224   vec_add1 (addr_fib, ip4_af);
1225
1226   /* When adding an address check that it does not conflict with an existing address. */
1227   if (! is_del)
1228     {
1229       ip_interface_address_t * ia;
1230       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1231                                     0 /* honor unnumbered */,
1232       ({
1233         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1234
1235         if (ip4_destination_matches_route (im, address, x, ia->address_length)
1236             || ip4_destination_matches_route (im, x, address, address_length))
1237           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1238                                     format_ip4_address_and_length, address, address_length,
1239                                     format_ip4_address_and_length, x, ia->address_length,
1240                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
1241       }));
1242     }
1243
1244   elts_before = pool_elts (lm->if_address_pool);
1245
1246   error = ip_interface_address_add_del
1247     (lm,
1248      sw_if_index,
1249      addr_fib,
1250      address_length,
1251      is_del,
1252      &if_address_index);
1253   if (error)
1254     goto done;
1255   
1256   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1257     {
1258       if (is_del)
1259         ip4_del_interface_routes (im, ip4_af.fib_index, address,
1260                                   address_length);
1261       
1262       else
1263           ip4_add_interface_routes (sw_if_index,
1264                                     im, ip4_af.fib_index,
1265                                     pool_elt_at_index 
1266                                     (lm->if_address_pool, if_address_index));
1267     }
1268
1269   /* If pool did not grow/shrink: add duplicate address. */
1270   if (elts_before != pool_elts (lm->if_address_pool))
1271     {
1272       ip4_add_del_interface_address_callback_t * cb;
1273       vec_foreach (cb, im->add_del_interface_address_callbacks)
1274         cb->function (im, cb->function_opaque, sw_if_index,
1275                       address, address_length,
1276                       if_address_index,
1277                       is_del);
1278     }
1279
1280  done:
1281   vec_free (addr_fib);
1282   return error;
1283 }
1284
1285 clib_error_t *
1286 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1287                                ip4_address_t * address, u32 address_length,
1288                                u32 is_del)
1289 {
1290   return ip4_add_del_interface_address_internal
1291     (vm, sw_if_index, address, address_length,
1292      /* redistribute */ 1,
1293      /* insert_routes */ 1,
1294      is_del);
1295 }
1296
1297 static clib_error_t *
1298 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1299                                 u32 sw_if_index,
1300                                 u32 flags)
1301 {
1302   ip4_main_t * im = &ip4_main;
1303   ip_interface_address_t * ia;
1304   ip4_address_t * a;
1305   u32 is_admin_up, fib_index;
1306   
1307   /* Fill in lookup tables with default table (0). */
1308   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1309   
1310   vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1311   
1312   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1313   
1314   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1315
1316   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
1317                                 0 /* honor unnumbered */,
1318   ({
1319     a = ip_interface_address_get_address (&im->lookup_main, ia);
1320     if (is_admin_up)
1321       ip4_add_interface_routes (sw_if_index,
1322                                 im, fib_index,
1323                                 ia);
1324     else
1325       ip4_del_interface_routes (im, fib_index,
1326                                 a, ia->address_length);
1327   }));
1328
1329   return 0;
1330 }
1331  
1332 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1333
1334 /* Built-in ip4 unicast rx feature path definition */
1335 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1336   .node_name = "ip4-inacl", 
1337   .runs_before = {"ip4-source-check-via-rx", 0}, 
1338   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1339 };
1340
1341 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1342   .node_name = "ip4-source-check-via-rx",
1343   .runs_before = {"ip4-source-check-via-any", 0},
1344   .feature_index = 
1345   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1346 };
1347
1348 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1349   .node_name = "ip4-source-check-via-any",
1350   .runs_before = {"ipsec-input-ip4", 0},
1351   .feature_index = 
1352   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1353 };
1354
1355 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1356   .node_name = "ipsec-input-ip4",
1357   .runs_before = {"vpath-input-ip4", 0},
1358   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1359 };
1360
1361 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1362   .node_name = "vpath-input-ip4",
1363   .runs_before = {"ip4-lookup", 0},
1364   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1365 };
1366
1367 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1368   .node_name = "ip4-lookup",
1369   .runs_before = {0}, /* not before any other features */
1370   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1371 };
1372
1373 /* Built-in ip4 multicast rx feature path definition */
1374 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1375   .node_name = "vpath-input-ip4",
1376   .runs_before = {"ip4-lookup-multicast", 0},
1377   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1378 };
1379
1380 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1381   .node_name = "ip4-lookup-multicast",
1382   .runs_before = {0}, /* not before any other features */
1383   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1384 };
1385
1386 static char * feature_start_nodes[] = 
1387   { "ip4-input", "ip4-input-no-checksum"};
1388
1389 static clib_error_t *
1390 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1391 {
1392   ip_lookup_main_t * lm = &im->lookup_main;
1393   clib_error_t * error;
1394   vnet_cast_t cast;
1395
1396   for (cast = 0; cast < VNET_N_CAST; cast++)
1397     {
1398       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1399       vnet_config_main_t * vcm = &cm->config_main;
1400
1401       if ((error = ip_feature_init_cast (vm, cm, vcm, 
1402                                          feature_start_nodes,
1403                                          ARRAY_LEN(feature_start_nodes),
1404                                          cast,
1405                                          1 /* is_ip4 */)))
1406         return error;
1407     }
1408   return 0;
1409 }
1410
1411 static clib_error_t *
1412 ip4_sw_interface_add_del (vnet_main_t * vnm,
1413                           u32 sw_if_index,
1414                           u32 is_add)
1415 {
1416   vlib_main_t * vm = vnm->vlib_main;
1417   ip4_main_t * im = &ip4_main;
1418   ip_lookup_main_t * lm = &im->lookup_main;
1419   u32 ci, cast;
1420   u32 feature_index;
1421
1422   for (cast = 0; cast < VNET_N_CAST; cast++)
1423     {
1424       ip_config_main_t * cm = &lm->rx_config_mains[cast];
1425       vnet_config_main_t * vcm = &cm->config_main;
1426
1427       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1428       ci = cm->config_index_by_sw_if_index[sw_if_index];
1429
1430       if (cast == VNET_UNICAST)
1431         feature_index = im->ip4_unicast_rx_feature_lookup;
1432       else
1433         feature_index = im->ip4_multicast_rx_feature_lookup;
1434
1435       if (is_add)
1436         ci = vnet_config_add_feature (vm, vcm,
1437                                       ci,
1438                                       feature_index,
1439                                       /* config data */ 0,
1440                                       /* # bytes of config data */ 0);
1441       else
1442         ci = vnet_config_del_feature (vm, vcm,
1443                                       ci,
1444                                       feature_index,
1445                                       /* config data */ 0,
1446                                       /* # bytes of config data */ 0);
1447
1448       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1449     }
1450
1451   return /* no error */ 0;
1452 }
1453
1454 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1455
1456 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1457
1458 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1459   .function = ip4_lookup,
1460   .name = "ip4-lookup",
1461   .vector_size = sizeof (u32),
1462
1463   .format_trace = format_ip4_lookup_trace,
1464
1465   .n_next_nodes = IP4_LOOKUP_N_NEXT,
1466   .next_nodes = IP4_LOOKUP_NEXT_NODES,
1467 };
1468
1469 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
1470
1471 static uword
1472 ip4_indirect (vlib_main_t * vm,
1473                vlib_node_runtime_t * node,
1474                vlib_frame_t * frame)
1475 {
1476   return ip4_lookup_inline (vm, node, frame,
1477                             /* lookup_for_responses_to_locally_received_packets */ 0,
1478                             /* is_indirect */ 1);
1479 }
1480
1481 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1482   .function = ip4_indirect,
1483   .name = "ip4-indirect",
1484   .vector_size = sizeof (u32),
1485   .sibling_of = "ip4-lookup",
1486   .format_trace = format_ip4_lookup_trace,
1487
1488   .n_next_nodes = 0,
1489 };
1490
1491 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
1492
1493
1494 /* Global IP4 main. */
1495 ip4_main_t ip4_main;
1496
1497 clib_error_t *
1498 ip4_lookup_init (vlib_main_t * vm)
1499 {
1500   ip4_main_t * im = &ip4_main;
1501   clib_error_t * error;
1502   uword i;
1503
1504   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1505     {
1506       u32 m;
1507
1508       if (i < 32)
1509         m = pow2_mask (i) << (32 - i);
1510       else 
1511         m = ~0;
1512       im->fib_masks[i] = clib_host_to_net_u32 (m);
1513     }
1514
1515   /* Create FIB with index 0 and table id of 0. */
1516   find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1517
1518   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1519
1520   {
1521     pg_node_t * pn;
1522     pn = pg_get_node (ip4_lookup_node.index);
1523     pn->unformat_edit = unformat_pg_ip4_header;
1524   }
1525
1526   {
1527     ethernet_arp_header_t h;
1528
1529     memset (&h, 0, sizeof (h));
1530
1531     /* Set target ethernet address to all zeros. */
1532     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1533
1534 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1535 #define _8(f,v) h.f = v;
1536     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1537     _16 (l3_type, ETHERNET_TYPE_IP4);
1538     _8 (n_l2_address_bytes, 6);
1539     _8 (n_l3_address_bytes, 4);
1540     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1541 #undef _16
1542 #undef _8
1543
1544     vlib_packet_template_init (vm,
1545                                &im->ip4_arp_request_packet_template,
1546                                /* data */ &h,
1547                                sizeof (h),
1548                                /* alloc chunk size */ 8,
1549                                "ip4 arp");
1550   }
1551
1552   error = ip4_feature_init (vm, im);
1553
1554   return error;
1555 }
1556
1557 VLIB_INIT_FUNCTION (ip4_lookup_init);
1558
1559 typedef struct {
1560   /* Adjacency taken. */
1561   u32 adj_index;
1562   u32 flow_hash;
1563   u32 fib_index;
1564
1565   /* Packet data, possibly *after* rewrite. */
1566   u8 packet_data[64 - 1*sizeof(u32)];
1567 } ip4_forward_next_trace_t;
1568
1569 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1570 {
1571   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1572   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1573   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1574   uword indent = format_get_indent (s);
1575   s = format (s, "%U%U",
1576                 format_white_space, indent,
1577                 format_ip4_header, t->packet_data);
1578   return s;
1579 }
1580
1581 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1582 {
1583   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1584   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1585   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1586   vnet_main_t * vnm = vnet_get_main();
1587   ip4_main_t * im = &ip4_main;
1588   uword indent = format_get_indent (s);
1589
1590   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1591               t->fib_index, t->adj_index, format_ip_adjacency,
1592               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1593   s = format (s, "\n%U%U",
1594               format_white_space, indent,
1595               format_ip4_header, t->packet_data);
1596   return s;
1597 }
1598
1599 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1600 {
1601   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1602   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1603   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1604   vnet_main_t * vnm = vnet_get_main();
1605   ip4_main_t * im = &ip4_main;
1606   uword indent = format_get_indent (s);
1607
1608   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1609               t->fib_index, t->adj_index, format_ip_adjacency,
1610               vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1611   s = format (s, "\n%U%U",
1612               format_white_space, indent,
1613               format_ip_adjacency_packet_data,
1614               vnm, &im->lookup_main, t->adj_index,
1615               t->packet_data, sizeof (t->packet_data));
1616   return s;
1617 }
1618
1619 /* Common trace function for all ip4-forward next nodes. */
1620 void
1621 ip4_forward_next_trace (vlib_main_t * vm,
1622                         vlib_node_runtime_t * node,
1623                         vlib_frame_t * frame,
1624                         vlib_rx_or_tx_t which_adj_index)
1625 {
1626   u32 * from, n_left;
1627   ip4_main_t * im = &ip4_main;
1628
1629   n_left = frame->n_vectors;
1630   from = vlib_frame_vector_args (frame);
1631   
1632   while (n_left >= 4)
1633     {
1634       u32 bi0, bi1;
1635       vlib_buffer_t * b0, * b1;
1636       ip4_forward_next_trace_t * t0, * t1;
1637
1638       /* Prefetch next iteration. */
1639       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1640       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1641
1642       bi0 = from[0];
1643       bi1 = from[1];
1644
1645       b0 = vlib_get_buffer (vm, bi0);
1646       b1 = vlib_get_buffer (vm, bi1);
1647
1648       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1649         {
1650           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1651           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1652           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1653           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1654               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1655               vec_elt (im->fib_index_by_sw_if_index,
1656                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1657
1658           clib_memcpy (t0->packet_data,
1659                   vlib_buffer_get_current (b0),
1660                   sizeof (t0->packet_data));
1661         }
1662       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1663         {
1664           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1665           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1666           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1667           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1668               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1669               vec_elt (im->fib_index_by_sw_if_index,
1670                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1671           clib_memcpy (t1->packet_data,
1672                   vlib_buffer_get_current (b1),
1673                   sizeof (t1->packet_data));
1674         }
1675       from += 2;
1676       n_left -= 2;
1677     }
1678
1679   while (n_left >= 1)
1680     {
1681       u32 bi0;
1682       vlib_buffer_t * b0;
1683       ip4_forward_next_trace_t * t0;
1684
1685       bi0 = from[0];
1686
1687       b0 = vlib_get_buffer (vm, bi0);
1688
1689       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1690         {
1691           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1692           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1693           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1694           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1695               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1696               vec_elt (im->fib_index_by_sw_if_index,
1697                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1698           clib_memcpy (t0->packet_data,
1699                   vlib_buffer_get_current (b0),
1700                   sizeof (t0->packet_data));
1701         }
1702       from += 1;
1703       n_left -= 1;
1704     }
1705 }
1706
1707 static uword
1708 ip4_drop_or_punt (vlib_main_t * vm,
1709                   vlib_node_runtime_t * node,
1710                   vlib_frame_t * frame,
1711                   ip4_error_t error_code)
1712 {
1713   u32 * buffers = vlib_frame_vector_args (frame);
1714   uword n_packets = frame->n_vectors;
1715
1716   vlib_error_drop_buffers (vm, node,
1717                            buffers,
1718                            /* stride */ 1,
1719                            n_packets,
1720                            /* next */ 0,
1721                            ip4_input_node.index,
1722                            error_code);
1723
1724   if (node->flags & VLIB_NODE_FLAG_TRACE)
1725     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1726
1727   return n_packets;
1728 }
1729
1730 static uword
1731 ip4_drop (vlib_main_t * vm,
1732           vlib_node_runtime_t * node,
1733           vlib_frame_t * frame)
1734 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1735
1736 static uword
1737 ip4_punt (vlib_main_t * vm,
1738           vlib_node_runtime_t * node,
1739           vlib_frame_t * frame)
1740 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1741
1742 static uword
1743 ip4_miss (vlib_main_t * vm,
1744           vlib_node_runtime_t * node,
1745           vlib_frame_t * frame)
1746 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1747
1748 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1749   .function = ip4_drop,
1750   .name = "ip4-drop",
1751   .vector_size = sizeof (u32),
1752
1753   .format_trace = format_ip4_forward_next_trace,
1754
1755   .n_next_nodes = 1,
1756   .next_nodes = {
1757     [0] = "error-drop",
1758   },
1759 };
1760
1761 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1762
1763 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1764   .function = ip4_punt,
1765   .name = "ip4-punt",
1766   .vector_size = sizeof (u32),
1767
1768   .format_trace = format_ip4_forward_next_trace,
1769
1770   .n_next_nodes = 1,
1771   .next_nodes = {
1772     [0] = "error-punt",
1773   },
1774 };
1775
1776 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1777
1778 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1779   .function = ip4_miss,
1780   .name = "ip4-miss",
1781   .vector_size = sizeof (u32),
1782
1783   .format_trace = format_ip4_forward_next_trace,
1784
1785   .n_next_nodes = 1,
1786   .next_nodes = {
1787     [0] = "error-drop",
1788   },
1789 };
1790
1791 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
1792
1793 /* Compute TCP/UDP/ICMP4 checksum in software. */
1794 u16
1795 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1796                               ip4_header_t * ip0)
1797 {
1798   ip_csum_t sum0;
1799   u32 ip_header_length, payload_length_host_byte_order;
1800   u32 n_this_buffer, n_bytes_left;
1801   u16 sum16;
1802   void * data_this_buffer;
1803   
1804   /* Initialize checksum with ip header. */
1805   ip_header_length = ip4_header_bytes (ip0);
1806   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1807   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1808
1809   if (BITS (uword) == 32)
1810     {
1811       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1812       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1813     }
1814   else
1815     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1816
1817   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1818   data_this_buffer = (void *) ip0 + ip_header_length;
1819   if (n_this_buffer + ip_header_length > p0->current_length)
1820     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1821   while (1)
1822     {
1823       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1824       n_bytes_left -= n_this_buffer;
1825       if (n_bytes_left == 0)
1826         break;
1827
1828       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1829       p0 = vlib_get_buffer (vm, p0->next_buffer);
1830       data_this_buffer = vlib_buffer_get_current (p0);
1831       n_this_buffer = p0->current_length;
1832     }
1833
1834   sum16 = ~ ip_csum_fold (sum0);
1835
1836   return sum16;
1837 }
1838
1839 static u32
1840 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1841 {
1842   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1843   udp_header_t * udp0;
1844   u16 sum16;
1845
1846   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1847           || ip0->protocol == IP_PROTOCOL_UDP);
1848
1849   udp0 = (void *) (ip0 + 1);
1850   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1851     {
1852       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1853                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1854       return p0->flags;
1855     }
1856
1857   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1858
1859   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1860                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1861
1862   return p0->flags;
1863 }
1864
1865 static uword
1866 ip4_local (vlib_main_t * vm,
1867            vlib_node_runtime_t * node,
1868            vlib_frame_t * frame)
1869 {
1870   ip4_main_t * im = &ip4_main;
1871   ip_lookup_main_t * lm = &im->lookup_main;
1872   ip_local_next_t next_index;
1873   u32 * from, * to_next, n_left_from, n_left_to_next;
1874   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1875
1876   from = vlib_frame_vector_args (frame);
1877   n_left_from = frame->n_vectors;
1878   next_index = node->cached_next_index;
1879   
1880   if (node->flags & VLIB_NODE_FLAG_TRACE)
1881     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1882
1883   while (n_left_from > 0)
1884     {
1885       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1886
1887       while (n_left_from >= 4 && n_left_to_next >= 2)
1888         {
1889           vlib_buffer_t * p0, * p1;
1890           ip4_header_t * ip0, * ip1;
1891           udp_header_t * udp0, * udp1;
1892           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1893           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1894           ip_adjacency_t * adj0, * adj1;
1895           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1896           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1897           i32 len_diff0, len_diff1;
1898           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1899           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1900           u8 enqueue_code;
1901       
1902           pi0 = to_next[0] = from[0];
1903           pi1 = to_next[1] = from[1];
1904           from += 2;
1905           n_left_from -= 2;
1906           to_next += 2;
1907           n_left_to_next -= 2;
1908       
1909           p0 = vlib_get_buffer (vm, pi0);
1910           p1 = vlib_get_buffer (vm, pi1);
1911
1912           ip0 = vlib_buffer_get_current (p0);
1913           ip1 = vlib_buffer_get_current (p1);
1914
1915           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1916                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1917           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1918                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1919
1920           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1921           mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1922
1923           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1924
1925           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1926           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1927
1928           /* Treat IP frag packets as "experimental" protocol for now
1929              until support of IP frag reassembly is implemented */
1930           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1931           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1932           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1933           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1934           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1935           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1936
1937           flags0 = p0->flags;
1938           flags1 = p1->flags;
1939
1940           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1941           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1942
1943           udp0 = ip4_next_header (ip0);
1944           udp1 = ip4_next_header (ip1);
1945
1946           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1947           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1948           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1949
1950           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1951           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1952
1953           /* Verify UDP length. */
1954           ip_len0 = clib_net_to_host_u16 (ip0->length);
1955           ip_len1 = clib_net_to_host_u16 (ip1->length);
1956           udp_len0 = clib_net_to_host_u16 (udp0->length);
1957           udp_len1 = clib_net_to_host_u16 (udp1->length);
1958
1959           len_diff0 = ip_len0 - udp_len0;
1960           len_diff1 = ip_len1 - udp_len1;
1961
1962           len_diff0 = is_udp0 ? len_diff0 : 0;
1963           len_diff1 = is_udp1 ? len_diff1 : 0;
1964
1965           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1966                                 & good_tcp_udp0 & good_tcp_udp1)))
1967             {
1968               if (is_tcp_udp0)
1969                 {
1970                   if (is_tcp_udp0
1971                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1972                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1973                   good_tcp_udp0 =
1974                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1975                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1976                 }
1977               if (is_tcp_udp1)
1978                 {
1979                   if (is_tcp_udp1
1980                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1981                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1982                   good_tcp_udp1 =
1983                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1984                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1985                 }
1986             }
1987
1988           good_tcp_udp0 &= len_diff0 >= 0;
1989           good_tcp_udp1 &= len_diff1 >= 0;
1990
1991           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1992           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1993
1994           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1995
1996           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1997           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1998
1999           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2000           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2001                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2002                     : error0);
2003           error1 = (is_tcp_udp1 && ! good_tcp_udp1
2004                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2005                     : error1);
2006
2007           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2008           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2009
2010           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2011           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2012
2013           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2014           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2015
2016           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2017                                                            &ip0->src_address,
2018                                                            /* no_default_route */ 1));
2019           ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2020                                                            &ip1->src_address,
2021                                                            /* no_default_route */ 1));
2022
2023           adj0 = ip_get_adjacency (lm, adj_index0);
2024           adj1 = ip_get_adjacency (lm, adj_index1);
2025
2026           /* 
2027            * Must have a route to source otherwise we drop the packet.
2028            * ip4 broadcasts are accepted, e.g. to make dhcp client work
2029            */
2030           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2031                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2032                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2033                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2034                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2035                     ? IP4_ERROR_SRC_LOOKUP_MISS
2036                     : error0);
2037           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2038                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2039                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2040                     && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2041                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2042                     ? IP4_ERROR_SRC_LOOKUP_MISS
2043                     : error1);
2044
2045           next0 = lm->local_next_by_ip_protocol[proto0];
2046           next1 = lm->local_next_by_ip_protocol[proto1];
2047
2048           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2049           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2050
2051           p0->error = error0 ? error_node->errors[error0] : 0;
2052           p1->error = error1 ? error_node->errors[error1] : 0;
2053
2054           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2055
2056           if (PREDICT_FALSE (enqueue_code != 0))
2057             {
2058               switch (enqueue_code)
2059                 {
2060                 case 1:
2061                   /* A B A */
2062                   to_next[-2] = pi1;
2063                   to_next -= 1;
2064                   n_left_to_next += 1;
2065                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2066                   break;
2067
2068                 case 2:
2069                   /* A A B */
2070                   to_next -= 1;
2071                   n_left_to_next += 1;
2072                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2073                   break;
2074
2075                 case 3:
2076                   /* A B B or A B C */
2077                   to_next -= 2;
2078                   n_left_to_next += 2;
2079                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2080                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2081                   if (next0 == next1)
2082                     {
2083                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2084                       next_index = next1;
2085                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2086                     }
2087                   break;
2088                 }
2089             }
2090         }
2091
2092       while (n_left_from > 0 && n_left_to_next > 0)
2093         {
2094           vlib_buffer_t * p0;
2095           ip4_header_t * ip0;
2096           udp_header_t * udp0;
2097           ip4_fib_mtrie_t * mtrie0;
2098           ip4_fib_mtrie_leaf_t leaf0;
2099           ip_adjacency_t * adj0;
2100           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2101           i32 len_diff0;
2102           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2103       
2104           pi0 = to_next[0] = from[0];
2105           from += 1;
2106           n_left_from -= 1;
2107           to_next += 1;
2108           n_left_to_next -= 1;
2109       
2110           p0 = vlib_get_buffer (vm, pi0);
2111
2112           ip0 = vlib_buffer_get_current (p0);
2113
2114           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2115                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2116
2117           mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2118
2119           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2120
2121           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2122
2123           /* Treat IP frag packets as "experimental" protocol for now
2124              until support of IP frag reassembly is implemented */
2125           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2126           is_udp0 = proto0 == IP_PROTOCOL_UDP;
2127           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2128
2129           flags0 = p0->flags;
2130
2131           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2132
2133           udp0 = ip4_next_header (ip0);
2134
2135           /* Don't verify UDP checksum for packets with explicit zero checksum. */
2136           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2137
2138           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2139
2140           /* Verify UDP length. */
2141           ip_len0 = clib_net_to_host_u16 (ip0->length);
2142           udp_len0 = clib_net_to_host_u16 (udp0->length);
2143
2144           len_diff0 = ip_len0 - udp_len0;
2145
2146           len_diff0 = is_udp0 ? len_diff0 : 0;
2147
2148           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2149             {
2150               if (is_tcp_udp0)
2151                 {
2152                   if (is_tcp_udp0
2153                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2154                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2155                   good_tcp_udp0 =
2156                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2157                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2158                 }
2159             }
2160
2161           good_tcp_udp0 &= len_diff0 >= 0;
2162
2163           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2164
2165           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2166
2167           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2168
2169           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2170           error0 = (is_tcp_udp0 && ! good_tcp_udp0
2171                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2172                     : error0);
2173
2174           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2175
2176           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2177           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2178
2179           ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2180                                                            &ip0->src_address,
2181                                                            /* no_default_route */ 1));
2182
2183           adj0 = ip_get_adjacency (lm, adj_index0);
2184
2185           /* Must have a route to source otherwise we drop the packet. */
2186           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2187                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2188                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2189                     && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2190                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
2191                     ? IP4_ERROR_SRC_LOOKUP_MISS
2192                     : error0);
2193
2194           next0 = lm->local_next_by_ip_protocol[proto0];
2195
2196           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2197
2198           p0->error = error0? error_node->errors[error0] : 0;
2199
2200           if (PREDICT_FALSE (next0 != next_index))
2201             {
2202               n_left_to_next += 1;
2203               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2204
2205               next_index = next0;
2206               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2207               to_next[0] = pi0;
2208               to_next += 1;
2209               n_left_to_next -= 1;
2210             }
2211         }
2212   
2213       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2214     }
2215
2216   return frame->n_vectors;
2217 }
2218
2219 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2220   .function = ip4_local,
2221   .name = "ip4-local",
2222   .vector_size = sizeof (u32),
2223
2224   .format_trace = format_ip4_forward_next_trace,
2225
2226   .n_next_nodes = IP_LOCAL_N_NEXT,
2227   .next_nodes = {
2228     [IP_LOCAL_NEXT_DROP] = "error-drop",
2229     [IP_LOCAL_NEXT_PUNT] = "error-punt",
2230     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2231     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2232   },
2233 };
2234
2235 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
2236
2237 void ip4_register_protocol (u32 protocol, u32 node_index)
2238 {
2239   vlib_main_t * vm = vlib_get_main();
2240   ip4_main_t * im = &ip4_main;
2241   ip_lookup_main_t * lm = &im->lookup_main;
2242
2243   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2244   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2245 }
2246
2247 static clib_error_t *
2248 show_ip_local_command_fn (vlib_main_t * vm,
2249                           unformat_input_t * input,
2250                          vlib_cli_command_t * cmd)
2251 {
2252   ip4_main_t * im = &ip4_main;
2253   ip_lookup_main_t * lm = &im->lookup_main;
2254   int i;
2255
2256   vlib_cli_output (vm, "Protocols handled by ip4_local");
2257   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2258     {
2259       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2260         vlib_cli_output (vm, "%d", i);
2261     }
2262   return 0;
2263 }
2264
2265
2266
2267 VLIB_CLI_COMMAND (show_ip_local, static) = {
2268   .path = "show ip local",
2269   .function = show_ip_local_command_fn,
2270   .short_help = "Show ip local protocol table",
2271 };
2272
2273 static uword
2274 ip4_arp (vlib_main_t * vm,
2275          vlib_node_runtime_t * node,
2276          vlib_frame_t * frame)
2277 {
2278   vnet_main_t * vnm = vnet_get_main();
2279   ip4_main_t * im = &ip4_main;
2280   ip_lookup_main_t * lm = &im->lookup_main;
2281   u32 * from, * to_next_drop;
2282   uword n_left_from, n_left_to_next_drop, next_index;
2283   static f64 time_last_seed_change = -1e100;
2284   static u32 hash_seeds[3];
2285   static uword hash_bitmap[256 / BITS (uword)]; 
2286   f64 time_now;
2287
2288   if (node->flags & VLIB_NODE_FLAG_TRACE)
2289     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2290
2291   time_now = vlib_time_now (vm);
2292   if (time_now - time_last_seed_change > 1e-3)
2293     {
2294       uword i;
2295       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2296                                              sizeof (hash_seeds));
2297       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2298         hash_seeds[i] = r[i];
2299
2300       /* Mark all hash keys as been no-seen before. */
2301       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2302         hash_bitmap[i] = 0;
2303
2304       time_last_seed_change = time_now;
2305     }
2306
2307   from = vlib_frame_vector_args (frame);
2308   n_left_from = frame->n_vectors;
2309   next_index = node->cached_next_index;
2310   if (next_index == IP4_ARP_NEXT_DROP)
2311     next_index = IP4_ARP_N_NEXT; /* point to first interface */
2312
2313   while (n_left_from > 0)
2314     {
2315       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2316                            to_next_drop, n_left_to_next_drop);
2317
2318       while (n_left_from > 0 && n_left_to_next_drop > 0)
2319         {
2320           vlib_buffer_t * p0;
2321           ip4_header_t * ip0;
2322           ethernet_header_t * eh0;
2323           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2324           uword bm0;
2325           ip_adjacency_t * adj0;
2326
2327           pi0 = from[0];
2328
2329           p0 = vlib_get_buffer (vm, pi0);
2330
2331           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2332           adj0 = ip_get_adjacency (lm, adj_index0);
2333           ip0 = vlib_buffer_get_current (p0);
2334
2335           /* If packet destination is not local, send ARP to next hop */
2336           if (adj0->arp.next_hop.ip4.as_u32)
2337             ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2338
2339           /* 
2340            * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2341            * rewrite to this packet, we need to skip it here.
2342            * Note, to distinguish from src IP addr *.8.6.*, we
2343            * check for a bcast eth dest instead of IPv4 version.
2344            */
2345           eh0 = (ethernet_header_t*)ip0;
2346           if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2347             {
2348               u32 vlan_num = 0;
2349               u16 * etype = &eh0->type;
2350               while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q 
2351                   || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad 
2352                 {
2353                   vlan_num += 1;
2354                   etype += 2; //vlan tag also 16 bits, same as etype
2355                 }
2356               if (*etype == clib_host_to_net_u16 (0x0806))     //arp
2357                 {
2358                   vlib_buffer_advance (
2359                       p0, sizeof(ethernet_header_t) + (4*vlan_num));
2360                   ip0 = vlib_buffer_get_current (p0);
2361                 }
2362             }
2363
2364           a0 = hash_seeds[0];
2365           b0 = hash_seeds[1];
2366           c0 = hash_seeds[2];
2367
2368           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2369           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2370
2371           a0 ^= ip0->dst_address.data_u32;
2372           b0 ^= sw_if_index0;
2373
2374           hash_v3_finalize32 (a0, b0, c0);
2375
2376           c0 &= BITS (hash_bitmap) - 1;
2377           c0 = c0 / BITS (uword);
2378           m0 = (uword) 1 << (c0 % BITS (uword));
2379
2380           bm0 = hash_bitmap[c0];
2381           drop0 = (bm0 & m0) != 0;
2382
2383           /* Mark it as seen. */
2384           hash_bitmap[c0] = bm0 | m0;
2385
2386           from += 1;
2387           n_left_from -= 1;
2388           to_next_drop[0] = pi0;
2389           to_next_drop += 1;
2390           n_left_to_next_drop -= 1;
2391
2392           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2393
2394           if (drop0)
2395             continue;
2396
2397           /* 
2398            * Can happen if the control-plane is programming tables
2399            * with traffic flowing; at least that's today's lame excuse.
2400            */
2401           if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) 
2402             {
2403               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2404             }
2405           else
2406           /* Send ARP request. */
2407           {
2408             u32 bi0 = 0;
2409             vlib_buffer_t * b0;
2410             ethernet_arp_header_t * h0;
2411             vnet_hw_interface_t * hw_if0;
2412
2413             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2414
2415             /* Add rewrite/encap string for ARP packet. */
2416             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2417
2418             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2419
2420             /* Src ethernet address in ARP header. */
2421             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2422                     sizeof (h0->ip4_over_ethernet[0].ethernet));
2423
2424             if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2425                 //No source address available
2426                 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2427                 vlib_buffer_free(vm, &bi0, 1);
2428                 continue;
2429             }
2430
2431             /* Copy in destination address we are requesting. */
2432             h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2433
2434             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2435             b0 = vlib_get_buffer (vm, bi0);
2436             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2437
2438             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2439
2440             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2441           }
2442         }
2443
2444       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2445     }
2446
2447   return frame->n_vectors;
2448 }
2449
2450 static char * ip4_arp_error_strings[] = {
2451   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2452   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2453   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2454   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2455   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2456   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2457 };
2458
2459 VLIB_REGISTER_NODE (ip4_arp_node) = {
2460   .function = ip4_arp,
2461   .name = "ip4-arp",
2462   .vector_size = sizeof (u32),
2463
2464   .format_trace = format_ip4_forward_next_trace,
2465
2466   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2467   .error_strings = ip4_arp_error_strings,
2468
2469   .n_next_nodes = IP4_ARP_N_NEXT,
2470   .next_nodes = {
2471     [IP4_ARP_NEXT_DROP] = "error-drop",
2472   },
2473 };
2474
2475 #define foreach_notrace_ip4_arp_error           \
2476 _(DROP)                                         \
2477 _(REQUEST_SENT)                                 \
2478 _(REPLICATE_DROP)                               \
2479 _(REPLICATE_FAIL)
2480
2481 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2482 {
2483   vlib_node_runtime_t *rt = 
2484     vlib_node_get_runtime (vm, ip4_arp_node.index);
2485
2486   /* don't trace ARP request packets */
2487 #define _(a)                                    \
2488     vnet_pcap_drop_trace_filter_add_del         \
2489         (rt->errors[IP4_ARP_ERROR_##a],         \
2490          1 /* is_add */);
2491     foreach_notrace_ip4_arp_error;
2492 #undef _
2493   return 0;
2494 }
2495
2496 VLIB_INIT_FUNCTION(arp_notrace_init);
2497
2498
2499 /* Send an ARP request to see if given destination is reachable on given interface. */
2500 clib_error_t *
2501 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2502 {
2503   vnet_main_t * vnm = vnet_get_main();
2504   ip4_main_t * im = &ip4_main;
2505   ethernet_arp_header_t * h;
2506   ip4_address_t * src;
2507   ip_interface_address_t * ia;
2508   ip_adjacency_t * adj;
2509   vnet_hw_interface_t * hi;
2510   vnet_sw_interface_t * si;
2511   vlib_buffer_t * b;
2512   u32 bi = 0;
2513
2514   si = vnet_get_sw_interface (vnm, sw_if_index);
2515
2516   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2517     {
2518       return clib_error_return (0, "%U: interface %U down",
2519                                 format_ip4_address, dst, 
2520                                 format_vnet_sw_if_index_name, vnm, 
2521                                 sw_if_index);
2522     }
2523
2524   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2525   if (! src)
2526     {
2527       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2528       return clib_error_return 
2529         (0, "no matching interface address for destination %U (interface %U)",
2530          format_ip4_address, dst,
2531          format_vnet_sw_if_index_name, vnm, sw_if_index);
2532     }
2533
2534   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2535
2536   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2537
2538   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2539
2540   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2541
2542   h->ip4_over_ethernet[0].ip4 = src[0];
2543   h->ip4_over_ethernet[1].ip4 = dst[0];
2544
2545   b = vlib_get_buffer (vm, bi);
2546   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2547
2548   /* Add encapsulation string for software interface (e.g. ethernet header). */
2549   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2550   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2551
2552   {
2553     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2554     u32 * to_next = vlib_frame_vector_args (f);
2555     to_next[0] = bi;
2556     f->n_vectors = 1;
2557     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2558   }
2559
2560   return /* no error */ 0;
2561 }
2562
2563 typedef enum {
2564   IP4_REWRITE_NEXT_DROP,
2565   IP4_REWRITE_NEXT_ARP,
2566   IP4_REWRITE_NEXT_ICMP_ERROR,
2567 } ip4_rewrite_next_t;
2568
2569 always_inline uword
2570 ip4_rewrite_inline (vlib_main_t * vm,
2571                     vlib_node_runtime_t * node,
2572                     vlib_frame_t * frame,
2573                     int rewrite_for_locally_received_packets)
2574 {
2575   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2576   u32 * from = vlib_frame_vector_args (frame);
2577   u32 n_left_from, n_left_to_next, * to_next, next_index;
2578   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2579   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2580
2581   n_left_from = frame->n_vectors;
2582   next_index = node->cached_next_index;
2583   u32 cpu_index = os_get_cpu_number();
2584   
2585   while (n_left_from > 0)
2586     {
2587       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2588
2589       while (n_left_from >= 4 && n_left_to_next >= 2)
2590         {
2591           ip_adjacency_t * adj0, * adj1;
2592           vlib_buffer_t * p0, * p1;
2593           ip4_header_t * ip0, * ip1;
2594           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2595           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2596           u32 next0_override, next1_override;
2597       
2598           if (rewrite_for_locally_received_packets)
2599               next0_override = next1_override = 0;
2600
2601           /* Prefetch next iteration. */
2602           {
2603             vlib_buffer_t * p2, * p3;
2604
2605             p2 = vlib_get_buffer (vm, from[2]);
2606             p3 = vlib_get_buffer (vm, from[3]);
2607
2608             vlib_prefetch_buffer_header (p2, STORE);
2609             vlib_prefetch_buffer_header (p3, STORE);
2610
2611             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2612             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2613           }
2614
2615           pi0 = to_next[0] = from[0];
2616           pi1 = to_next[1] = from[1];
2617
2618           from += 2;
2619           n_left_from -= 2;
2620           to_next += 2;
2621           n_left_to_next -= 2;
2622       
2623           p0 = vlib_get_buffer (vm, pi0);
2624           p1 = vlib_get_buffer (vm, pi1);
2625
2626           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2627           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2628
2629           /* We should never rewrite a pkt using the MISS adjacency */
2630           ASSERT(adj_index0 && adj_index1);
2631
2632           ip0 = vlib_buffer_get_current (p0);
2633           ip1 = vlib_buffer_get_current (p1);
2634
2635           error0 = error1 = IP4_ERROR_NONE;
2636           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2637
2638           /* Decrement TTL & update checksum.
2639              Works either endian, so no need for byte swap. */
2640           if (! rewrite_for_locally_received_packets)
2641             {
2642               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2643
2644               /* Input node should have reject packets with ttl 0. */
2645               ASSERT (ip0->ttl > 0);
2646               ASSERT (ip1->ttl > 0);
2647
2648               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2649               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2650
2651               checksum0 += checksum0 >= 0xffff;
2652               checksum1 += checksum1 >= 0xffff;
2653
2654               ip0->checksum = checksum0;
2655               ip1->checksum = checksum1;
2656
2657               ttl0 -= 1;
2658               ttl1 -= 1;
2659
2660               ip0->ttl = ttl0;
2661               ip1->ttl = ttl1;
2662
2663               /*
2664                * If the ttl drops below 1 when forwarding, generate
2665                * an ICMP response.
2666                */
2667               if (PREDICT_FALSE(ttl0 <= 0))
2668                 {
2669                   error0 = IP4_ERROR_TIME_EXPIRED;
2670                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2671                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2672                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2673                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2674                 }
2675               if (PREDICT_FALSE(ttl1 <= 0))
2676                 {
2677                   error1 = IP4_ERROR_TIME_EXPIRED;
2678                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2679                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2680                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2681                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2682                 }
2683
2684               /* Verify checksum. */
2685               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2686               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2687             }
2688
2689           /* Rewrite packet header and updates lengths. */
2690           adj0 = ip_get_adjacency (lm, adj_index0);
2691           adj1 = ip_get_adjacency (lm, adj_index1);
2692       
2693           if (rewrite_for_locally_received_packets)
2694             {
2695               /*
2696                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2697                * we end up here with a local adjacency in hand
2698                * The local adj rewrite data is 0xfefe on purpose.
2699                * Bad engineer, no donut for you.
2700                */
2701               if (PREDICT_FALSE(adj0->lookup_next_index 
2702                                 == IP_LOOKUP_NEXT_LOCAL))
2703                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2704               if (PREDICT_FALSE(adj0->lookup_next_index
2705                                 == IP_LOOKUP_NEXT_ARP))
2706                 next0_override = IP4_REWRITE_NEXT_ARP;
2707               if (PREDICT_FALSE(adj1->lookup_next_index 
2708                                 == IP_LOOKUP_NEXT_LOCAL))
2709                 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2710               if (PREDICT_FALSE(adj1->lookup_next_index
2711                                 == IP_LOOKUP_NEXT_ARP))
2712                 next1_override = IP4_REWRITE_NEXT_ARP;
2713             }
2714
2715           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2716           rw_len0 = adj0[0].rewrite_header.data_bytes;
2717           rw_len1 = adj1[0].rewrite_header.data_bytes;
2718
2719           /* Check MTU of outgoing interface. */
2720           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2721                     ? IP4_ERROR_MTU_EXCEEDED
2722                     : error0);
2723           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2724                     ? IP4_ERROR_MTU_EXCEEDED
2725                     : error1);
2726
2727           next0 = (error0 == IP4_ERROR_NONE)
2728             ? adj0[0].rewrite_header.next_index : next0;
2729
2730           if (rewrite_for_locally_received_packets)
2731               next0 = next0 && next0_override ? next0_override : next0;
2732
2733           next1 = (error1 == IP4_ERROR_NONE)
2734             ? adj1[0].rewrite_header.next_index : next1;
2735
2736           if (rewrite_for_locally_received_packets)
2737               next1 = next1 && next1_override ? next1_override : next1;
2738
2739           /* 
2740            * We've already accounted for an ethernet_header_t elsewhere
2741            */
2742           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2743               vlib_increment_combined_counter 
2744                   (&lm->adjacency_counters,
2745                    cpu_index, adj_index0, 
2746                    /* packet increment */ 0,
2747                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2748
2749           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2750               vlib_increment_combined_counter 
2751                   (&lm->adjacency_counters,
2752                    cpu_index, adj_index1, 
2753                    /* packet increment */ 0,
2754                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2755
2756           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2757            * to see the IP headerr */
2758           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2759             {
2760               p0->current_data -= rw_len0;
2761               p0->current_length += rw_len0;
2762               p0->error = error_node->errors[error0];
2763               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2764                   adj0[0].rewrite_header.sw_if_index;
2765             }
2766           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2767             {
2768               p1->current_data -= rw_len1;
2769               p1->current_length += rw_len1;
2770               p1->error = error_node->errors[error1];
2771               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2772                   adj1[0].rewrite_header.sw_if_index;
2773             }
2774
2775           /* Guess we are only writing on simple Ethernet header. */
2776           vnet_rewrite_two_headers (adj0[0], adj1[0],
2777                                     ip0, ip1,
2778                                     sizeof (ethernet_header_t));
2779       
2780           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2781                                            to_next, n_left_to_next,
2782                                            pi0, pi1, next0, next1);
2783         }
2784
2785       while (n_left_from > 0 && n_left_to_next > 0)
2786         {
2787           ip_adjacency_t * adj0;
2788           vlib_buffer_t * p0;
2789           ip4_header_t * ip0;
2790           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2791           u32 next0_override;
2792       
2793           if (rewrite_for_locally_received_packets)
2794               next0_override = 0;
2795
2796           pi0 = to_next[0] = from[0];
2797
2798           p0 = vlib_get_buffer (vm, pi0);
2799
2800           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2801
2802           /* We should never rewrite a pkt using the MISS adjacency */
2803           ASSERT(adj_index0);
2804
2805           adj0 = ip_get_adjacency (lm, adj_index0);
2806       
2807           ip0 = vlib_buffer_get_current (p0);
2808
2809           error0 = IP4_ERROR_NONE;
2810           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2811
2812           /* Decrement TTL & update checksum. */
2813           if (! rewrite_for_locally_received_packets)
2814             {
2815               i32 ttl0 = ip0->ttl;
2816
2817               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2818
2819               checksum0 += checksum0 >= 0xffff;
2820
2821               ip0->checksum = checksum0;
2822
2823               ASSERT (ip0->ttl > 0);
2824
2825               ttl0 -= 1;
2826
2827               ip0->ttl = ttl0;
2828
2829               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2830
2831               if (PREDICT_FALSE(ttl0 <= 0))
2832                 {
2833                   /*
2834                    * If the ttl drops below 1 when forwarding, generate
2835                    * an ICMP response.
2836                    */
2837                   error0 = IP4_ERROR_TIME_EXPIRED;
2838                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2839                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2840                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2841                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2842                 }
2843             }
2844
2845           if (rewrite_for_locally_received_packets)
2846             {
2847               /*
2848                * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2849                * we end up here with a local adjacency in hand
2850                * The local adj rewrite data is 0xfefe on purpose.
2851                * Bad engineer, no donut for you.
2852                */
2853               if (PREDICT_FALSE(adj0->lookup_next_index 
2854                                 == IP_LOOKUP_NEXT_LOCAL))
2855                 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2856               /* 
2857                * We have to override the next_index in ARP adjacencies,
2858                * because they're set up for ip4-arp, not this node...
2859                */
2860               if (PREDICT_FALSE(adj0->lookup_next_index
2861                                 == IP_LOOKUP_NEXT_ARP))
2862                 next0_override = IP4_REWRITE_NEXT_ARP;
2863             }
2864
2865           /* Guess we are only writing on simple Ethernet header. */
2866           vnet_rewrite_one_header (adj0[0], ip0, 
2867                                    sizeof (ethernet_header_t));
2868           
2869           /* Update packet buffer attributes/set output interface. */
2870           rw_len0 = adj0[0].rewrite_header.data_bytes;
2871           
2872           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2873               vlib_increment_combined_counter 
2874                   (&lm->adjacency_counters,
2875                    cpu_index, adj_index0, 
2876                    /* packet increment */ 0,
2877                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2878           
2879           /* Check MTU of outgoing interface. */
2880           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2881                     > adj0[0].rewrite_header.max_l3_packet_bytes
2882                     ? IP4_ERROR_MTU_EXCEEDED
2883                     : error0);
2884
2885           p0->error = error_node->errors[error0];
2886
2887           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2888            * to see the IP headerr */
2889           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2890             {
2891               p0->current_data -= rw_len0;
2892               p0->current_length += rw_len0;
2893
2894               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2895                   adj0[0].rewrite_header.sw_if_index;
2896               next0 = adj0[0].rewrite_header.next_index;
2897             }
2898
2899           if (rewrite_for_locally_received_packets)
2900               next0 = next0 && next0_override ? next0_override : next0;
2901
2902           from += 1;
2903           n_left_from -= 1;
2904           to_next += 1;
2905           n_left_to_next -= 1;
2906       
2907           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2908                                            to_next, n_left_to_next,
2909                                            pi0, next0);
2910         }
2911   
2912       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2913     }
2914
2915   /* Need to do trace after rewrites to pick up new packet data. */
2916   if (node->flags & VLIB_NODE_FLAG_TRACE)
2917     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2918
2919   return frame->n_vectors;
2920 }
2921
2922 static uword
2923 ip4_rewrite_transit (vlib_main_t * vm,
2924                      vlib_node_runtime_t * node,
2925                      vlib_frame_t * frame)
2926 {
2927   return ip4_rewrite_inline (vm, node, frame,
2928                              /* rewrite_for_locally_received_packets */ 0);
2929 }
2930
2931 static uword
2932 ip4_rewrite_local (vlib_main_t * vm,
2933                    vlib_node_runtime_t * node,
2934                    vlib_frame_t * frame)
2935 {
2936   return ip4_rewrite_inline (vm, node, frame,
2937                              /* rewrite_for_locally_received_packets */ 1);
2938 }
2939
2940 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2941   .function = ip4_rewrite_transit,
2942   .name = "ip4-rewrite-transit",
2943   .vector_size = sizeof (u32),
2944
2945   .format_trace = format_ip4_rewrite_trace,
2946
2947   .n_next_nodes = 3,
2948   .next_nodes = {
2949     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2950     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2951     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2952   },
2953 };
2954
2955 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2956
2957 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2958   .function = ip4_rewrite_local,
2959   .name = "ip4-rewrite-local",
2960   .vector_size = sizeof (u32),
2961
2962   .sibling_of = "ip4-rewrite-transit",
2963
2964   .format_trace = format_ip4_rewrite_trace,
2965
2966   .n_next_nodes = 0,
2967 };
2968
2969 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2970
2971 static clib_error_t *
2972 add_del_interface_table (vlib_main_t * vm,
2973                          unformat_input_t * input,
2974                          vlib_cli_command_t * cmd)
2975 {
2976   vnet_main_t * vnm = vnet_get_main();
2977   clib_error_t * error = 0;
2978   u32 sw_if_index, table_id;
2979
2980   sw_if_index = ~0;
2981
2982   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2983     {
2984       error = clib_error_return (0, "unknown interface `%U'",
2985                                  format_unformat_error, input);
2986       goto done;
2987     }
2988
2989   if (unformat (input, "%d", &table_id))
2990     ;
2991   else
2992     {
2993       error = clib_error_return (0, "expected table id `%U'",
2994                                  format_unformat_error, input);
2995       goto done;
2996     }
2997
2998   {
2999     ip4_main_t * im = &ip4_main;
3000     ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3001
3002     if (fib) 
3003       {
3004         vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3005         im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3006     }
3007   }
3008
3009  done:
3010   return error;
3011 }
3012
3013 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3014   .path = "set interface ip table",
3015   .function = add_del_interface_table,
3016   .short_help = "Add/delete FIB table id for interface",
3017 };
3018
3019
3020 static uword
3021 ip4_lookup_multicast (vlib_main_t * vm,
3022                       vlib_node_runtime_t * node,
3023                       vlib_frame_t * frame)
3024 {
3025   ip4_main_t * im = &ip4_main;
3026   ip_lookup_main_t * lm = &im->lookup_main;
3027   vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3028   u32 n_left_from, n_left_to_next, * from, * to_next;
3029   ip_lookup_next_t next;
3030   u32 cpu_index = os_get_cpu_number();
3031
3032   from = vlib_frame_vector_args (frame);
3033   n_left_from = frame->n_vectors;
3034   next = node->cached_next_index;
3035
3036   while (n_left_from > 0)
3037     {
3038       vlib_get_next_frame (vm, node, next,
3039                            to_next, n_left_to_next);
3040
3041       while (n_left_from >= 4 && n_left_to_next >= 2)
3042         {
3043           vlib_buffer_t * p0, * p1;
3044           u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3045           ip_lookup_next_t next0, next1;
3046           ip4_header_t * ip0, * ip1;
3047           ip_adjacency_t * adj0, * adj1;
3048           u32 fib_index0, fib_index1;
3049           u32 flow_hash_config0, flow_hash_config1;
3050
3051           /* Prefetch next iteration. */
3052           {
3053             vlib_buffer_t * p2, * p3;
3054
3055             p2 = vlib_get_buffer (vm, from[2]);
3056             p3 = vlib_get_buffer (vm, from[3]);
3057
3058             vlib_prefetch_buffer_header (p2, LOAD);
3059             vlib_prefetch_buffer_header (p3, LOAD);
3060
3061             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3062             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3063           }
3064
3065           pi0 = to_next[0] = from[0];
3066           pi1 = to_next[1] = from[1];
3067
3068           p0 = vlib_get_buffer (vm, pi0);
3069           p1 = vlib_get_buffer (vm, pi1);
3070
3071           ip0 = vlib_buffer_get_current (p0);
3072           ip1 = vlib_buffer_get_current (p1);
3073
3074           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3075           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3076           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3077             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3078           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3079             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3080
3081           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3082                                               &ip0->dst_address, p0);
3083           adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, 
3084                                               &ip1->dst_address, p1);
3085
3086           adj0 = ip_get_adjacency (lm, adj_index0);
3087           adj1 = ip_get_adjacency (lm, adj_index1);
3088
3089           next0 = adj0->lookup_next_index;
3090           next1 = adj1->lookup_next_index;
3091
3092           flow_hash_config0 = 
3093               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3094
3095           flow_hash_config1 = 
3096               vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3097
3098           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
3099               (ip0, flow_hash_config0);
3100                                                                   
3101           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
3102               (ip1, flow_hash_config1);
3103
3104           ASSERT (adj0->n_adj > 0);
3105           ASSERT (adj1->n_adj > 0);
3106           ASSERT (is_pow2 (adj0->n_adj));
3107           ASSERT (is_pow2 (adj1->n_adj));
3108           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3109           adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3110
3111           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3112           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3113
3114           if (1) /* $$$$$$ HACK FIXME */
3115           vlib_increment_combined_counter 
3116               (cm, cpu_index, adj_index0, 1,
3117                vlib_buffer_length_in_chain (vm, p0));
3118           if (1) /* $$$$$$ HACK FIXME */
3119           vlib_increment_combined_counter 
3120               (cm, cpu_index, adj_index1, 1,
3121                vlib_buffer_length_in_chain (vm, p1));
3122
3123           from += 2;
3124           to_next += 2;
3125           n_left_to_next -= 2;
3126           n_left_from -= 2;
3127
3128           wrong_next = (next0 != next) + 2*(next1 != next);
3129           if (PREDICT_FALSE (wrong_next != 0))
3130             {
3131               switch (wrong_next)
3132                 {
3133                 case 1:
3134                   /* A B A */
3135                   to_next[-2] = pi1;
3136                   to_next -= 1;
3137                   n_left_to_next += 1;
3138                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3139                   break;
3140
3141                 case 2:
3142                   /* A A B */
3143                   to_next -= 1;
3144                   n_left_to_next += 1;
3145                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3146                   break;
3147
3148                 case 3:
3149                   /* A B C */
3150                   to_next -= 2;
3151                   n_left_to_next += 2;
3152                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
3153                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
3154                   if (next0 == next1)
3155                     {
3156                       /* A B B */
3157                       vlib_put_next_frame (vm, node, next, n_left_to_next);
3158                       next = next1;
3159                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3160                     }
3161                 }
3162             }
3163         }
3164     
3165       while (n_left_from > 0 && n_left_to_next > 0)
3166         {
3167           vlib_buffer_t * p0;
3168           ip4_header_t * ip0;
3169           u32 pi0, adj_index0;
3170           ip_lookup_next_t next0;
3171           ip_adjacency_t * adj0;
3172           u32 fib_index0;
3173           u32 flow_hash_config0;
3174
3175           pi0 = from[0];
3176           to_next[0] = pi0;
3177
3178           p0 = vlib_get_buffer (vm, pi0);
3179
3180           ip0 = vlib_buffer_get_current (p0);
3181
3182           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
3183                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3184           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3185               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3186           
3187           adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, 
3188                                               &ip0->dst_address, p0);
3189
3190           adj0 = ip_get_adjacency (lm, adj_index0);
3191
3192           next0 = adj0->lookup_next_index;
3193
3194           flow_hash_config0 = 
3195               vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3196
3197           vnet_buffer (p0)->ip.flow_hash = 
3198             ip4_compute_flow_hash (ip0, flow_hash_config0);
3199
3200           ASSERT (adj0->n_adj > 0);
3201           ASSERT (is_pow2 (adj0->n_adj));
3202           adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3203
3204           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3205
3206           if (1) /* $$$$$$ HACK FIXME */
3207               vlib_increment_combined_counter 
3208                   (cm, cpu_index, adj_index0, 1,
3209                    vlib_buffer_length_in_chain (vm, p0));
3210
3211           from += 1;
3212           to_next += 1;
3213           n_left_to_next -= 1;
3214           n_left_from -= 1;
3215
3216           if (PREDICT_FALSE (next0 != next))
3217             {
3218               n_left_to_next += 1;
3219               vlib_put_next_frame (vm, node, next, n_left_to_next);
3220               next = next0;
3221               vlib_get_next_frame (vm, node, next,
3222                                    to_next, n_left_to_next);
3223               to_next[0] = pi0;
3224               to_next += 1;
3225               n_left_to_next -= 1;
3226             }
3227         }
3228
3229       vlib_put_next_frame (vm, node, next, n_left_to_next);
3230     }
3231
3232   if (node->flags & VLIB_NODE_FLAG_TRACE)
3233       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3234
3235   return frame->n_vectors;
3236 }
3237
3238 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3239   .function = ip4_lookup_multicast,
3240   .name = "ip4-lookup-multicast",
3241   .vector_size = sizeof (u32),
3242   .sibling_of = "ip4-lookup",
3243   .format_trace = format_ip4_lookup_trace,
3244
3245   .n_next_nodes = 0,
3246 };
3247
3248 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3249
3250 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3251   .function = ip4_drop,
3252   .name = "ip4-multicast",
3253   .vector_size = sizeof (u32),
3254
3255   .format_trace = format_ip4_forward_next_trace,
3256
3257   .n_next_nodes = 1,
3258   .next_nodes = {
3259     [0] = "error-drop",
3260   },
3261 };
3262
3263 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3264 {
3265   ip4_main_t * im = &ip4_main;
3266   ip4_fib_mtrie_t * mtrie0;
3267   ip4_fib_mtrie_leaf_t leaf0;
3268   u32 adj_index0;
3269     
3270   mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3271
3272   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3273   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3274   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3275   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3276   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3277   
3278   /* Handle default route. */
3279   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3280   
3281   adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3282   
3283   return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3284                                                   a, 
3285                                                   /* no_default_route */ 0);
3286 }
3287  
3288 static clib_error_t *
3289 test_lookup_command_fn (vlib_main_t * vm,
3290                         unformat_input_t * input,
3291                         vlib_cli_command_t * cmd)
3292 {
3293   u32 table_id = 0;
3294   f64 count = 1;
3295   u32 n;
3296   int i;
3297   ip4_address_t ip4_base_address;
3298   u64 errors = 0;
3299
3300   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3301       if (unformat (input, "table %d", &table_id))
3302         ;
3303       else if (unformat (input, "count %f", &count))
3304         ;
3305
3306       else if (unformat (input, "%U",
3307                          unformat_ip4_address, &ip4_base_address))
3308         ;
3309       else
3310         return clib_error_return (0, "unknown input `%U'",
3311                                   format_unformat_error, input);
3312   }
3313
3314   n = count;
3315
3316   for (i = 0; i < n; i++)
3317     {
3318       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3319         errors++;
3320
3321       ip4_base_address.as_u32 = 
3322         clib_host_to_net_u32 (1 + 
3323                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3324     }
3325
3326   if (errors) 
3327     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3328   else
3329     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3330
3331   return 0;
3332 }
3333
3334 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3335     .path = "test lookup",
3336     .short_help = "test lookup",
3337     .function = test_lookup_command_fn,
3338 };
3339
3340 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3341 {
3342   ip4_main_t * im4 = &ip4_main;
3343   ip4_fib_t * fib;
3344   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3345
3346   if (p == 0)
3347     return VNET_API_ERROR_NO_SUCH_FIB;
3348
3349   fib = vec_elt_at_index (im4->fibs, p[0]);
3350
3351   fib->flow_hash_config = flow_hash_config;
3352   return 0;
3353 }
3354  
3355 static clib_error_t *
3356 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3357                              unformat_input_t * input,
3358                              vlib_cli_command_t * cmd)
3359 {
3360   int matched = 0;
3361   u32 table_id = 0;
3362   u32 flow_hash_config = 0;
3363   int rv;
3364
3365   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3366     if (unformat (input, "table %d", &table_id))
3367       matched = 1;
3368 #define _(a,v) \
3369     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3370     foreach_flow_hash_bit
3371 #undef _
3372     else break;
3373   }
3374   
3375   if (matched == 0)
3376     return clib_error_return (0, "unknown input `%U'",
3377                               format_unformat_error, input);
3378   
3379   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3380   switch (rv)
3381     {
3382     case 0:
3383       break;
3384       
3385     case VNET_API_ERROR_NO_SUCH_FIB:
3386       return clib_error_return (0, "no such FIB table %d", table_id);
3387       
3388     default:
3389       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3390       break;
3391     }
3392   
3393   return 0;
3394 }
3395  
3396 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3397   .path = "set ip flow-hash",
3398   .short_help = 
3399   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3400   .function = set_ip_flow_hash_command_fn,
3401 };
3402  
3403 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3404                                  u32 table_index)
3405 {
3406   vnet_main_t * vnm = vnet_get_main();
3407   vnet_interface_main_t * im = &vnm->interface_main;
3408   ip4_main_t * ipm = &ip4_main;
3409   ip_lookup_main_t * lm = &ipm->lookup_main;
3410   vnet_classify_main_t * cm = &vnet_classify_main;
3411
3412   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3413     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3414
3415   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3416     return VNET_API_ERROR_NO_SUCH_ENTRY;
3417
3418   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3419   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3420
3421   return 0;
3422 }
3423
3424 static clib_error_t *
3425 set_ip_classify_command_fn (vlib_main_t * vm,
3426                             unformat_input_t * input,
3427                             vlib_cli_command_t * cmd)
3428 {
3429   u32 table_index = ~0;
3430   int table_index_set = 0;
3431   u32 sw_if_index = ~0;
3432   int rv;
3433   
3434   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3435     if (unformat (input, "table-index %d", &table_index))
3436       table_index_set = 1;
3437     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3438                        vnet_get_main(), &sw_if_index))
3439       ;
3440     else
3441       break;
3442   }
3443       
3444   if (table_index_set == 0)
3445     return clib_error_return (0, "classify table-index must be specified");
3446
3447   if (sw_if_index == ~0)
3448     return clib_error_return (0, "interface / subif must be specified");
3449
3450   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3451
3452   switch (rv)
3453     {
3454     case 0:
3455       break;
3456
3457     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3458       return clib_error_return (0, "No such interface");
3459
3460     case VNET_API_ERROR_NO_SUCH_ENTRY:
3461       return clib_error_return (0, "No such classifier table");
3462     }
3463   return 0;
3464 }
3465
3466 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3467     .path = "set ip classify",
3468     .short_help = 
3469     "set ip classify intfc <int> table-index <index>",
3470     .function = set_ip_classify_command_fn,
3471 };
3472