urpf: Unicast reverse Path Forwarding (plugin)
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57
58 #include <vnet/ip/ip4_forward.h>
59 #include <vnet/interface_output.h>
60 #include <vnet/classify/vnet_classify.h>
61
62 /** @brief IPv4 lookup node.
63     @node ip4-lookup
64
65     This is the main IPv4 lookup dispatch node.
66
67     @param vm vlib_main_t corresponding to the current thread
68     @param node vlib_node_runtime_t
69     @param frame vlib_frame_t whose contents should be dispatched
70
71     @par Graph mechanics: buffer metadata, next index usage
72
73     @em Uses:
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
75         - Indicates the @c sw_if_index value of the interface that the
76           packet was received on.
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
78         - When the value is @c ~0 then the node performs a longest prefix
79           match (LPM) for the packet destination address in the FIB attached
80           to the receive interface.
81         - Otherwise perform LPM for the packet destination address in the
82           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
83           value (0, 1, ...) and not a VRF id.
84
85     @em Sets:
86     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
87         - The lookup result adjacency index.
88
89     <em>Next Index:</em>
90     - Dispatches the packet to the node index found in
91       ip_adjacency_t @c adj->lookup_next_index
92       (where @c adj is the lookup result adjacency).
93 */
94 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95                                 vlib_frame_t * frame)
96 {
97   return ip4_lookup_inline (vm, node, frame);
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left, *from;
119   u32 thread_index = vm->thread_index;
120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
121   u16 nexts[VLIB_FRAME_SIZE], *next;
122
123   from = vlib_frame_vector_args (frame);
124   n_left = frame->n_vectors;
125   next = nexts;
126
127   vlib_get_buffers (vm, from, bufs, n_left);
128
129   while (n_left >= 4)
130     {
131       const load_balance_t *lb0, *lb1;
132       const ip4_header_t *ip0, *ip1;
133       u32 lbi0, hc0, lbi1, hc1;
134       const dpo_id_t *dpo0, *dpo1;
135
136       /* Prefetch next iteration. */
137       {
138         vlib_prefetch_buffer_header (b[2], LOAD);
139         vlib_prefetch_buffer_header (b[3], LOAD);
140
141         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
142         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143       }
144
145       ip0 = vlib_buffer_get_current (b[0]);
146       ip1 = vlib_buffer_get_current (b[1]);
147       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
148       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
149
150       lb0 = load_balance_get (lbi0);
151       lb1 = load_balance_get (lbi1);
152
153       /*
154        * this node is for via FIBs we can re-use the hash value from the
155        * to node if present.
156        * We don't want to use the same hash value at each level in the recursion
157        * graph as that would lead to polarisation
158        */
159       hc0 = hc1 = 0;
160
161       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
162         {
163           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
164             {
165               hc0 = vnet_buffer (b[0])->ip.flow_hash =
166                 vnet_buffer (b[0])->ip.flow_hash >> 1;
167             }
168           else
169             {
170               hc0 = vnet_buffer (b[0])->ip.flow_hash =
171                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
172             }
173           dpo0 = load_balance_get_fwd_bucket
174             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
175         }
176       else
177         {
178           dpo0 = load_balance_get_bucket_i (lb0, 0);
179         }
180       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
181         {
182           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
183             {
184               hc1 = vnet_buffer (b[1])->ip.flow_hash =
185                 vnet_buffer (b[1])->ip.flow_hash >> 1;
186             }
187           else
188             {
189               hc1 = vnet_buffer (b[1])->ip.flow_hash =
190                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
191             }
192           dpo1 = load_balance_get_fwd_bucket
193             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
194         }
195       else
196         {
197           dpo1 = load_balance_get_bucket_i (lb1, 0);
198         }
199
200       next[0] = dpo0->dpoi_next_node;
201       next[1] = dpo1->dpoi_next_node;
202
203       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
204       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
205
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
210
211       b += 2;
212       next += 2;
213       n_left -= 2;
214     }
215
216   while (n_left > 0)
217     {
218       const load_balance_t *lb0;
219       const ip4_header_t *ip0;
220       const dpo_id_t *dpo0;
221       u32 lbi0, hc0;
222
223       ip0 = vlib_buffer_get_current (b[0]);
224       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
225
226       lb0 = load_balance_get (lbi0);
227
228       hc0 = 0;
229       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
230         {
231           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
232             {
233               hc0 = vnet_buffer (b[0])->ip.flow_hash =
234                 vnet_buffer (b[0])->ip.flow_hash >> 1;
235             }
236           else
237             {
238               hc0 = vnet_buffer (b[0])->ip.flow_hash =
239                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
240             }
241           dpo0 = load_balance_get_fwd_bucket
242             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
243         }
244       else
245         {
246           dpo0 = load_balance_get_bucket_i (lb0, 0);
247         }
248
249       next[0] = dpo0->dpoi_next_node;
250       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
251
252       vlib_increment_combined_counter
253         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
254
255       b += 1;
256       next += 1;
257       n_left -= 1;
258     }
259
260   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
261   if (node->flags & VLIB_NODE_FLAG_TRACE)
262     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
263
264   return frame->n_vectors;
265 }
266
267 /* *INDENT-OFF* */
268 VLIB_REGISTER_NODE (ip4_load_balance_node) =
269 {
270   .name = "ip4-load-balance",
271   .vector_size = sizeof (u32),
272   .sibling_of = "ip4-lookup",
273   .format_trace = format_ip4_lookup_trace,
274 };
275 /* *INDENT-ON* */
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   /* *INDENT-OFF* */
288   foreach_ip_interface_address
289     (lm, ia, sw_if_index,
290      1 /* honor unnumbered */ ,
291      ({
292        ip4_address_t * a =
293          ip_interface_address_get_address (lm, ia);
294        result = a;
295        break;
296      }));
297   /* *INDENT-OFF* */
298   if (result_ia)
299     *result_ia = result ? ia : 0;
300   return result;
301 }
302 #endif
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_prefix_routes (ip4_main_t *im,
345                                  u32 sw_if_index,
346                                  u32 fib_index,
347                                  ip_interface_address_t * a)
348 {
349   ip_lookup_main_t *lm = &im->lookup_main;
350   ip_interface_prefix_t *if_prefix;
351   ip4_address_t *address = ip_interface_address_get_address (lm, a);
352
353   ip_interface_prefix_key_t key = {
354     .prefix = {
355       .fp_len = a->address_length,
356       .fp_proto = FIB_PROTOCOL_IP4,
357       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
358     },
359     .sw_if_index = sw_if_index,
360   };
361
362   fib_prefix_t pfx_special = {
363     .fp_proto = FIB_PROTOCOL_IP4,
364   };
365
366   /* If prefix already set on interface, just increment ref count & return */
367   if_prefix = ip_get_interface_prefix (lm, &key);
368   if (if_prefix)
369     {
370       if_prefix->ref_count += 1;
371       return;
372     }
373
374   /* New prefix - allocate a pool entry, initialize it, add to the hash */
375   pool_get (lm->if_prefix_pool, if_prefix);
376   if_prefix->ref_count = 1;
377   if_prefix->src_ia_index = a - lm->if_address_pool;
378   clib_memcpy (&if_prefix->key, &key, sizeof (key));
379   mhash_set (&lm->prefix_to_if_prefix_index, &key,
380              if_prefix - lm->if_prefix_pool, 0 /* old value */);
381
382   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
383   if (a->address_length <= 30)
384     {
385       pfx_special.fp_len = a->address_length;
386       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388       /* set the glean route for the prefix */
389       fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                        FIB_SOURCE_INTERFACE,
391                                        (FIB_ENTRY_FLAG_CONNECTED |
392                                         FIB_ENTRY_FLAG_ATTACHED),
393                                        DPO_PROTO_IP4,
394                                        /* No next-hop address */
395                                        NULL,
396                                        sw_if_index,
397                                        /* invalid FIB index */
398                                        ~0,
399                                        1,
400                                        /* no out-label stack */
401                                        NULL,
402                                        FIB_ROUTE_PATH_FLAG_NONE);
403
404       /* set a drop route for the base address of the prefix */
405       pfx_special.fp_len = 32;
406       pfx_special.fp_addr.ip4.as_u32 =
407         address->as_u32 & im->fib_masks[a->address_length];
408
409       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
410         fib_table_entry_special_add (fib_index, &pfx_special,
411                                      FIB_SOURCE_INTERFACE,
412                                      (FIB_ENTRY_FLAG_DROP |
413                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
414
415       /* set a route for the broadcast address of the prefix */
416       pfx_special.fp_len = 32;
417       pfx_special.fp_addr.ip4.as_u32 =
418         address->as_u32 | ~im->fib_masks[a->address_length];
419       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
420         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
421
422
423     }
424   /* length == 31 - add an attached route for the other address */
425   else if (a->address_length == 31)
426     {
427       pfx_special.fp_len = 32;
428       pfx_special.fp_addr.ip4.as_u32 =
429         address->as_u32 ^ clib_host_to_net_u32(1);
430
431       fib_table_entry_update_one_path (fib_index, &pfx_special,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &pfx_special.fp_addr,
436                                        sw_if_index,
437                                        /* invalid FIB index */
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443 }
444
445 static void
446 ip4_add_interface_routes (u32 sw_if_index,
447                           ip4_main_t * im, u32 fib_index,
448                           ip_interface_address_t * a)
449 {
450   ip_lookup_main_t *lm = &im->lookup_main;
451   ip4_address_t *address = ip_interface_address_get_address (lm, a);
452   fib_prefix_t pfx = {
453     .fp_len = 32,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   /* set special routes for the prefix if needed */
459   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
460
461   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
462     {
463       u32 classify_table_index =
464         lm->classify_table_index_by_sw_if_index[sw_if_index];
465       if (classify_table_index != (u32) ~ 0)
466         {
467           dpo_id_t dpo = DPO_INVALID;
468
469           dpo_set (&dpo,
470                    DPO_CLASSIFY,
471                    DPO_PROTO_IP4,
472                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
473
474           fib_table_entry_special_dpo_add (fib_index,
475                                            &pfx,
476                                            FIB_SOURCE_CLASSIFY,
477                                            FIB_ENTRY_FLAG_NONE, &dpo);
478           dpo_reset (&dpo);
479         }
480     }
481
482   fib_table_entry_update_one_path (fib_index, &pfx,
483                                    FIB_SOURCE_INTERFACE,
484                                    (FIB_ENTRY_FLAG_CONNECTED |
485                                     FIB_ENTRY_FLAG_LOCAL),
486                                    DPO_PROTO_IP4,
487                                    &pfx.fp_addr,
488                                    sw_if_index,
489                                    // invalid FIB index
490                                    ~0,
491                                    1, NULL,
492                                    FIB_ROUTE_PATH_FLAG_NONE);
493 }
494
495 static void
496 ip4_del_interface_prefix_routes (ip4_main_t * im,
497                                  u32 sw_if_index,
498                                  u32 fib_index,
499                                  ip4_address_t * address,
500                                  u32 address_length)
501 {
502   ip_lookup_main_t *lm = &im->lookup_main;
503   ip_interface_prefix_t *if_prefix;
504
505   ip_interface_prefix_key_t key = {
506     .prefix = {
507       .fp_len = address_length,
508       .fp_proto = FIB_PROTOCOL_IP4,
509       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
510     },
511     .sw_if_index = sw_if_index,
512   };
513
514   fib_prefix_t pfx_special = {
515     .fp_len = 32,
516     .fp_proto = FIB_PROTOCOL_IP4,
517   };
518
519   if_prefix = ip_get_interface_prefix (lm, &key);
520   if (!if_prefix)
521     {
522       clib_warning ("Prefix not found while deleting %U",
523                     format_ip4_address_and_length, address, address_length);
524       return;
525     }
526
527   if_prefix->ref_count -= 1;
528
529   /*
530    * Routes need to be adjusted if:
531    * - deleting last intf addr in prefix
532    * - deleting intf addr used as default source address in glean adjacency
533    *
534    * We're done now otherwise
535    */
536   if ((if_prefix->ref_count > 0) &&
537       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543
544       /* remove glean route for prefix */
545       pfx_special.fp_addr.ip4 = *address;
546       pfx_special.fp_len = address_length;
547       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
548
549       /* if no more intf addresses in prefix, remove other special routes */
550       if (!if_prefix->ref_count)
551         {
552           /* first address in prefix */
553           pfx_special.fp_addr.ip4.as_u32 =
554             address->as_u32 & im->fib_masks[address_length];
555           pfx_special.fp_len = 32;
556
557           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
558           fib_table_entry_special_remove (fib_index,
559                                           &pfx_special,
560                                           FIB_SOURCE_INTERFACE);
561
562           /* prefix broadcast address */
563           pfx_special.fp_addr.ip4.as_u32 =
564             address->as_u32 | ~im->fib_masks[address_length];
565           pfx_special.fp_len = 32;
566
567           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
568           fib_table_entry_special_remove (fib_index,
569                                           &pfx_special,
570                                           FIB_SOURCE_INTERFACE);
571         }
572       else
573         /* default source addr just got deleted, find another */
574         {
575           ip_interface_address_t *new_src_ia = NULL;
576           ip4_address_t *new_src_addr = NULL;
577
578           new_src_addr =
579             ip4_interface_address_matching_destination
580               (im, address, sw_if_index, &new_src_ia);
581
582           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
583
584           pfx_special.fp_len = address_length;
585           pfx_special.fp_addr.ip4 = *new_src_addr;
586
587           /* set new glean route for the prefix */
588           fib_table_entry_update_one_path (fib_index, &pfx_special,
589                                            FIB_SOURCE_INTERFACE,
590                                            (FIB_ENTRY_FLAG_CONNECTED |
591                                             FIB_ENTRY_FLAG_ATTACHED),
592                                            DPO_PROTO_IP4,
593                                            /* No next-hop address */
594                                            NULL,
595                                            sw_if_index,
596                                            /* invalid FIB index */
597                                            ~0,
598                                            1,
599                                            /* no out-label stack */
600                                            NULL,
601                                            FIB_ROUTE_PATH_FLAG_NONE);
602           return;
603         }
604     }
605   /* length == 31, delete attached route for the other address */
606   else if (address_length == 31)
607     {
608       pfx_special.fp_addr.ip4.as_u32 =
609         address->as_u32 ^ clib_host_to_net_u32(1);
610
611       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612     }
613
614   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
615   pool_put (lm->if_prefix_pool, if_prefix);
616 }
617
618 static void
619 ip4_del_interface_routes (u32 sw_if_index,
620                           ip4_main_t * im,
621                           u32 fib_index,
622                           ip4_address_t * address, u32 address_length)
623 {
624   fib_prefix_t pfx = {
625     .fp_len = address_length,
626     .fp_proto = FIB_PROTOCOL_IP4,
627     .fp_addr.ip4 = *address,
628   };
629
630   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
631                                    address, address_length);
632
633   pfx.fp_len = 32;
634   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 }
636
637 #ifndef CLIB_MARCH_VARIANT
638 void
639 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
640 {
641   ip4_main_t *im = &ip4_main;
642
643   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644
645   /*
646    * enable/disable only on the 1<->0 transition
647    */
648   if (is_enable)
649     {
650       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
651         return;
652     }
653   else
654     {
655       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
656       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657         return;
658     }
659   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
660                                !is_enable, 0, 0);
661
662
663   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
664                                sw_if_index, !is_enable, 0, 0);
665
666   {
667     ip4_enable_disable_interface_callback_t *cb;
668     vec_foreach (cb, im->enable_disable_interface_callbacks)
669       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
670   }
671 }
672
673 static clib_error_t *
674 ip4_add_del_interface_address_internal (vlib_main_t * vm,
675                                         u32 sw_if_index,
676                                         ip4_address_t * address,
677                                         u32 address_length, u32 is_del)
678 {
679   vnet_main_t *vnm = vnet_get_main ();
680   ip4_main_t *im = &ip4_main;
681   ip_lookup_main_t *lm = &im->lookup_main;
682   clib_error_t *error = 0;
683   u32 if_address_index, elts_before;
684   ip4_address_fib_t ip4_af, *addr_fib = 0;
685
686   /* local0 interface doesn't support IP addressing  */
687   if (sw_if_index == 0)
688     {
689       return
690        clib_error_create ("local0 interface doesn't support IP addressing");
691     }
692
693   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
694   ip4_addr_fib_init (&ip4_af, address,
695                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
696   vec_add1 (addr_fib, ip4_af);
697
698   /*
699    * there is no support for adj-fib handling in the presence of overlapping
700    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
701    * most routers do.
702    */
703   /* *INDENT-OFF* */
704   if (!is_del)
705     {
706       /* When adding an address check that it does not conflict
707          with an existing address on any interface in this table. */
708       ip_interface_address_t *ia;
709       vnet_sw_interface_t *sif;
710
711       pool_foreach(sif, vnm->interface_main.sw_interfaces,
712       ({
713           if (im->fib_index_by_sw_if_index[sw_if_index] ==
714               im->fib_index_by_sw_if_index[sif->sw_if_index])
715             {
716               foreach_ip_interface_address
717                 (&im->lookup_main, ia, sif->sw_if_index,
718                  0 /* honor unnumbered */ ,
719                  ({
720                    ip4_address_t * x =
721                      ip_interface_address_get_address
722                      (&im->lookup_main, ia);
723                    if (ip4_destination_matches_route
724                        (im, address, x, ia->address_length) ||
725                        ip4_destination_matches_route (im,
726                                                       x,
727                                                       address,
728                                                       address_length))
729                      {
730                        /* an intf may have >1 addr from the same prefix */
731                        if ((sw_if_index == sif->sw_if_index) &&
732                            (ia->address_length == address_length) &&
733                            (x->as_u32 != address->as_u32))
734                          continue;
735
736                        /* error if the length or intf was different */
737                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
738
739                        return
740                          clib_error_create
741                          ("failed to add %U on %U which conflicts with %U for interface %U",
742                           format_ip4_address_and_length, address,
743                           address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sw_if_index,
746                           format_ip4_address_and_length, x,
747                           ia->address_length,
748                           format_vnet_sw_if_index_name, vnm,
749                           sif->sw_if_index);
750                      }
751                  }));
752             }
753       }));
754     }
755   /* *INDENT-ON* */
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
761   if (error)
762     goto done;
763
764   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
765   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
766
767   /* intf addr routes are added/deleted on admin up/down */
768   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
769     {
770       if (is_del)
771         ip4_del_interface_routes (sw_if_index,
772                                   im, ip4_af.fib_index, address,
773                                   address_length);
774       else
775         ip4_add_interface_routes (sw_if_index,
776                                   im, ip4_af.fib_index,
777                                   pool_elt_at_index
778                                   (lm->if_address_pool, if_address_index));
779     }
780
781   /* If pool did not grow/shrink: add duplicate address. */
782   if (elts_before != pool_elts (lm->if_address_pool))
783     {
784       ip4_add_del_interface_address_callback_t *cb;
785       vec_foreach (cb, im->add_del_interface_address_callbacks)
786         cb->function (im, cb->function_opaque, sw_if_index,
787                       address, address_length, if_address_index, is_del);
788     }
789
790 done:
791   vec_free (addr_fib);
792   return error;
793 }
794
795 clib_error_t *
796 ip4_add_del_interface_address (vlib_main_t * vm,
797                                u32 sw_if_index,
798                                ip4_address_t * address,
799                                u32 address_length, u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length, is_del);
803 }
804
805 void
806 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
807 {
808   ip_interface_address_t *ia;
809   ip4_main_t *im;
810
811   im = &ip4_main;
812
813   /*
814    * when directed broadcast is enabled, the subnet braodcast route will forward
815    * packets using an adjacency with a broadcast MAC. otherwise it drops
816    */
817   /* *INDENT-OFF* */
818   foreach_ip_interface_address(&im->lookup_main, ia,
819                                sw_if_index, 0,
820      ({
821        if (ia->address_length <= 30)
822          {
823            ip4_address_t *ipa;
824
825            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
826
827            fib_prefix_t pfx = {
828              .fp_len = 32,
829              .fp_proto = FIB_PROTOCOL_IP4,
830              .fp_addr = {
831                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
832              },
833            };
834
835            ip4_add_subnet_bcast_route
836              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
837                                                   sw_if_index),
838               &pfx, sw_if_index);
839          }
840      }));
841   /* *INDENT-ON* */
842 }
843 #endif
844
845 static clib_error_t *
846 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
847 {
848   ip4_main_t *im = &ip4_main;
849   ip_interface_address_t *ia;
850   ip4_address_t *a;
851   u32 is_admin_up, fib_index;
852
853   /* Fill in lookup tables with default table (0). */
854   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
855
856   vec_validate_init_empty (im->
857                            lookup_main.if_address_pool_index_by_sw_if_index,
858                            sw_if_index, ~0);
859
860   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
861
862   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
863
864   /* *INDENT-OFF* */
865   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
866                                 0 /* honor unnumbered */,
867   ({
868     a = ip_interface_address_get_address (&im->lookup_main, ia);
869     if (is_admin_up)
870       ip4_add_interface_routes (sw_if_index,
871                                 im, fib_index,
872                                 ia);
873     else
874       ip4_del_interface_routes (sw_if_index,
875                                 im, fib_index,
876                                 a, ia->address_length);
877   }));
878   /* *INDENT-ON* */
879
880   return 0;
881 }
882
883 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
884
885 /* Built-in ip4 unicast rx feature path definition */
886 /* *INDENT-OFF* */
887 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
888 {
889   .arc_name = "ip4-unicast",
890   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
891   .last_in_arc = "ip4-lookup",
892   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
893 };
894
895 VNET_FEATURE_INIT (ip4_flow_classify, static) =
896 {
897   .arc_name = "ip4-unicast",
898   .node_name = "ip4-flow-classify",
899   .runs_before = VNET_FEATURES ("ip4-inacl"),
900 };
901
902 VNET_FEATURE_INIT (ip4_inacl, static) =
903 {
904   .arc_name = "ip4-unicast",
905   .node_name = "ip4-inacl",
906   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
907 };
908
909 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
910 {
911   .arc_name = "ip4-unicast",
912   .node_name = "ip4-source-and-port-range-check-rx",
913   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
914 };
915
916 VNET_FEATURE_INIT (ip4_policer_classify, static) =
917 {
918   .arc_name = "ip4-unicast",
919   .node_name = "ip4-policer-classify",
920   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
921 };
922
923 VNET_FEATURE_INIT (ip4_ipsec, static) =
924 {
925   .arc_name = "ip4-unicast",
926   .node_name = "ipsec4-input-feature",
927   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
928 };
929
930 VNET_FEATURE_INIT (ip4_vpath, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "vpath-input-ip4",
934   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
935 };
936
937 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ip4-vxlan-bypass",
941   .runs_before = VNET_FEATURES ("ip4-lookup"),
942 };
943
944 VNET_FEATURE_INIT (ip4_not_enabled, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "ip4-not-enabled",
948   .runs_before = VNET_FEATURES ("ip4-lookup"),
949 };
950
951 VNET_FEATURE_INIT (ip4_lookup, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-lookup",
955   .runs_before = 0,     /* not before any other features */
956 };
957
958 /* Built-in ip4 multicast rx feature path definition */
959 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
960 {
961   .arc_name = "ip4-multicast",
962   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
963   .last_in_arc = "ip4-mfib-forward-lookup",
964   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
968 {
969   .arc_name = "ip4-multicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
972 };
973
974 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
975 {
976   .arc_name = "ip4-multicast",
977   .node_name = "ip4-not-enabled",
978   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
982 {
983   .arc_name = "ip4-multicast",
984   .node_name = "ip4-mfib-forward-lookup",
985   .runs_before = 0,     /* last feature */
986 };
987
988 /* Source and port-range check ip4 tx feature path definition */
989 VNET_FEATURE_ARC_INIT (ip4_output, static) =
990 {
991   .arc_name = "ip4-output",
992   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
993   .last_in_arc = "interface-output",
994   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
995 };
996
997 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
998 {
999   .arc_name = "ip4-output",
1000   .node_name = "ip4-source-and-port-range-check-tx",
1001   .runs_before = VNET_FEATURES ("ip4-outacl"),
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_outacl, static) =
1005 {
1006   .arc_name = "ip4-output",
1007   .node_name = "ip4-outacl",
1008   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1012 {
1013   .arc_name = "ip4-output",
1014   .node_name = "ipsec4-output-feature",
1015   .runs_before = VNET_FEATURES ("interface-output"),
1016 };
1017
1018 /* Built-in ip4 tx feature path definition */
1019 VNET_FEATURE_INIT (ip4_interface_output, static) =
1020 {
1021   .arc_name = "ip4-output",
1022   .node_name = "interface-output",
1023   .runs_before = 0,     /* not before any other features */
1024 };
1025 /* *INDENT-ON* */
1026
1027 static clib_error_t *
1028 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1029 {
1030   ip4_main_t *im = &ip4_main;
1031
1032   /* Fill in lookup tables with default table (0). */
1033   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1034   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1035
1036   if (!is_add)
1037     {
1038       ip4_main_t *im4 = &ip4_main;
1039       ip_lookup_main_t *lm4 = &im4->lookup_main;
1040       ip_interface_address_t *ia = 0;
1041       ip4_address_t *address;
1042       vlib_main_t *vm = vlib_get_main ();
1043
1044       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1045       /* *INDENT-OFF* */
1046       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1047       ({
1048         address = ip_interface_address_get_address (lm4, ia);
1049         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1050       }));
1051       /* *INDENT-ON* */
1052       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1053     }
1054
1055   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1056                                is_add, 0, 0);
1057
1058   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1059                                sw_if_index, is_add, 0, 0);
1060
1061   return /* no error */ 0;
1062 }
1063
1064 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1065
1066 /* Global IP4 main. */
1067 #ifndef CLIB_MARCH_VARIANT
1068 ip4_main_t ip4_main;
1069 #endif /* CLIB_MARCH_VARIANT */
1070
1071 static clib_error_t *
1072 ip4_lookup_init (vlib_main_t * vm)
1073 {
1074   ip4_main_t *im = &ip4_main;
1075   clib_error_t *error;
1076   uword i;
1077
1078   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1079     return error;
1080   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1081     return (error);
1082   if ((error = vlib_call_init_function (vm, fib_module_init)))
1083     return error;
1084   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1085     return error;
1086
1087   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1088     {
1089       u32 m;
1090
1091       if (i < 32)
1092         m = pow2_mask (i) << (32 - i);
1093       else
1094         m = ~0;
1095       im->fib_masks[i] = clib_host_to_net_u32 (m);
1096     }
1097
1098   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1099
1100   /* Create FIB with index 0 and table id of 0. */
1101   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1102                                      FIB_SOURCE_DEFAULT_ROUTE);
1103   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1104                                       MFIB_SOURCE_DEFAULT_ROUTE);
1105
1106   {
1107     pg_node_t *pn;
1108     pn = pg_get_node (ip4_lookup_node.index);
1109     pn->unformat_edit = unformat_pg_ip4_header;
1110   }
1111
1112   {
1113     ethernet_arp_header_t h;
1114
1115     clib_memset (&h, 0, sizeof (h));
1116
1117 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1118 #define _8(f,v) h.f = v;
1119     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1120     _16 (l3_type, ETHERNET_TYPE_IP4);
1121     _8 (n_l2_address_bytes, 6);
1122     _8 (n_l3_address_bytes, 4);
1123     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1124 #undef _16
1125 #undef _8
1126
1127     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1128                                /* data */ &h,
1129                                sizeof (h),
1130                                /* alloc chunk size */ 8,
1131                                "ip4 arp");
1132   }
1133
1134   return error;
1135 }
1136
1137 VLIB_INIT_FUNCTION (ip4_lookup_init);
1138
1139 typedef struct
1140 {
1141   /* Adjacency taken. */
1142   u32 dpo_index;
1143   u32 flow_hash;
1144   u32 fib_index;
1145
1146   /* Packet data, possibly *after* rewrite. */
1147   u8 packet_data[64 - 1 * sizeof (u32)];
1148 }
1149 ip4_forward_next_trace_t;
1150
1151 #ifndef CLIB_MARCH_VARIANT
1152 u8 *
1153 format_ip4_forward_next_trace (u8 * s, va_list * args)
1154 {
1155   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1156   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1157   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1158   u32 indent = format_get_indent (s);
1159   s = format (s, "%U%U",
1160               format_white_space, indent,
1161               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1162   return s;
1163 }
1164 #endif
1165
1166 static u8 *
1167 format_ip4_lookup_trace (u8 * s, va_list * args)
1168 {
1169   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1170   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1171   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1172   u32 indent = format_get_indent (s);
1173
1174   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1175               t->fib_index, t->dpo_index, t->flow_hash);
1176   s = format (s, "\n%U%U",
1177               format_white_space, indent,
1178               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1179   return s;
1180 }
1181
1182 static u8 *
1183 format_ip4_rewrite_trace (u8 * s, va_list * args)
1184 {
1185   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1186   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1187   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1188   u32 indent = format_get_indent (s);
1189
1190   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1191               t->fib_index, t->dpo_index, format_ip_adjacency,
1192               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1193   s = format (s, "\n%U%U",
1194               format_white_space, indent,
1195               format_ip_adjacency_packet_data,
1196               t->packet_data, sizeof (t->packet_data));
1197   return s;
1198 }
1199
1200 #ifndef CLIB_MARCH_VARIANT
1201 /* Common trace function for all ip4-forward next nodes. */
1202 void
1203 ip4_forward_next_trace (vlib_main_t * vm,
1204                         vlib_node_runtime_t * node,
1205                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1206 {
1207   u32 *from, n_left;
1208   ip4_main_t *im = &ip4_main;
1209
1210   n_left = frame->n_vectors;
1211   from = vlib_frame_vector_args (frame);
1212
1213   while (n_left >= 4)
1214     {
1215       u32 bi0, bi1;
1216       vlib_buffer_t *b0, *b1;
1217       ip4_forward_next_trace_t *t0, *t1;
1218
1219       /* Prefetch next iteration. */
1220       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1221       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1222
1223       bi0 = from[0];
1224       bi1 = from[1];
1225
1226       b0 = vlib_get_buffer (vm, bi0);
1227       b1 = vlib_get_buffer (vm, bi1);
1228
1229       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1230         {
1231           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1232           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1233           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1234           t0->fib_index =
1235             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1236              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1237             vec_elt (im->fib_index_by_sw_if_index,
1238                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1239
1240           clib_memcpy_fast (t0->packet_data,
1241                             vlib_buffer_get_current (b0),
1242                             sizeof (t0->packet_data));
1243         }
1244       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1245         {
1246           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1247           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1248           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1249           t1->fib_index =
1250             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1251              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1252             vec_elt (im->fib_index_by_sw_if_index,
1253                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1254           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1255                             sizeof (t1->packet_data));
1256         }
1257       from += 2;
1258       n_left -= 2;
1259     }
1260
1261   while (n_left >= 1)
1262     {
1263       u32 bi0;
1264       vlib_buffer_t *b0;
1265       ip4_forward_next_trace_t *t0;
1266
1267       bi0 = from[0];
1268
1269       b0 = vlib_get_buffer (vm, bi0);
1270
1271       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1272         {
1273           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1274           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1275           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1276           t0->fib_index =
1277             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1278              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1279             vec_elt (im->fib_index_by_sw_if_index,
1280                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1281           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1282                             sizeof (t0->packet_data));
1283         }
1284       from += 1;
1285       n_left -= 1;
1286     }
1287 }
1288
1289 /* Compute TCP/UDP/ICMP4 checksum in software. */
1290 u16
1291 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1292                               ip4_header_t * ip0)
1293 {
1294   ip_csum_t sum0;
1295   u32 ip_header_length, payload_length_host_byte_order;
1296
1297   /* Initialize checksum with ip header. */
1298   ip_header_length = ip4_header_bytes (ip0);
1299   payload_length_host_byte_order =
1300     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1301   sum0 =
1302     clib_host_to_net_u32 (payload_length_host_byte_order +
1303                           (ip0->protocol << 16));
1304
1305   if (BITS (uword) == 32)
1306     {
1307       sum0 =
1308         ip_csum_with_carry (sum0,
1309                             clib_mem_unaligned (&ip0->src_address, u32));
1310       sum0 =
1311         ip_csum_with_carry (sum0,
1312                             clib_mem_unaligned (&ip0->dst_address, u32));
1313     }
1314   else
1315     sum0 =
1316       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1317
1318   return ip_calculate_l4_checksum (vm, p0, sum0,
1319                                    payload_length_host_byte_order, (u8 *) ip0,
1320                                    ip_header_length, NULL);
1321 }
1322
1323 u32
1324 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1325 {
1326   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1327   udp_header_t *udp0;
1328   u16 sum16;
1329
1330   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1331           || ip0->protocol == IP_PROTOCOL_UDP);
1332
1333   udp0 = (void *) (ip0 + 1);
1334   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1335     {
1336       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1337                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1338       return p0->flags;
1339     }
1340
1341   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1342
1343   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1344                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1345
1346   return p0->flags;
1347 }
1348 #endif
1349
1350 /* *INDENT-OFF* */
1351 VNET_FEATURE_ARC_INIT (ip4_local) =
1352 {
1353   .arc_name  = "ip4-local",
1354   .start_nodes = VNET_FEATURES ("ip4-local"),
1355   .last_in_arc = "ip4-local-end-of-arc",
1356 };
1357 /* *INDENT-ON* */
1358
1359 static inline void
1360 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1361                             ip4_header_t * ip, u8 is_udp, u8 * error,
1362                             u8 * good_tcp_udp)
1363 {
1364   u32 flags0;
1365   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1366   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1367   if (is_udp)
1368     {
1369       udp_header_t *udp;
1370       u32 ip_len, udp_len;
1371       i32 len_diff;
1372       udp = ip4_next_header (ip);
1373       /* Verify UDP length. */
1374       ip_len = clib_net_to_host_u16 (ip->length);
1375       udp_len = clib_net_to_host_u16 (udp->length);
1376
1377       len_diff = ip_len - udp_len;
1378       *good_tcp_udp &= len_diff >= 0;
1379       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1380     }
1381 }
1382
1383 #define ip4_local_csum_is_offloaded(_b)                                 \
1384     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1385         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1386
1387 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1388     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1389         || ip4_local_csum_is_offloaded (_b)))
1390
1391 #define ip4_local_csum_is_valid(_b)                                     \
1392     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1393         || (ip4_local_csum_is_offloaded (_b))) != 0
1394
1395 static inline void
1396 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1397                          ip4_header_t * ih, u8 * error)
1398 {
1399   u8 is_udp, is_tcp_udp, good_tcp_udp;
1400
1401   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1402   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1403
1404   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1405     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1406   else
1407     good_tcp_udp = ip4_local_csum_is_valid (b);
1408
1409   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1410   *error = (is_tcp_udp && !good_tcp_udp
1411             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1412 }
1413
1414 static inline void
1415 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1416                             ip4_header_t ** ih, u8 * error)
1417 {
1418   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1419
1420   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1421   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1422
1423   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1424   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1425
1426   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1427   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1428
1429   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1430                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1431     {
1432       if (is_tcp_udp[0])
1433         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1434                                     &good_tcp_udp[0]);
1435       if (is_tcp_udp[1])
1436         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1437                                     &good_tcp_udp[1]);
1438     }
1439
1440   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1441               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1442   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1443               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1444 }
1445
1446 static inline void
1447 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1448                               vlib_buffer_t * b, u16 * next, u8 error,
1449                               u8 head_of_feature_arc)
1450 {
1451   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1452   u32 next_index;
1453
1454   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1455   b->error = error ? error_node->errors[error] : 0;
1456   if (head_of_feature_arc)
1457     {
1458       next_index = *next;
1459       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1460         {
1461           vnet_feature_arc_start (arc_index,
1462                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1463                                   &next_index, b);
1464           *next = next_index;
1465         }
1466     }
1467 }
1468
1469 typedef struct
1470 {
1471   ip4_address_t src;
1472   u32 lbi;
1473   u8 error;
1474   u8 first;
1475 } ip4_local_last_check_t;
1476
1477 static inline void
1478 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1479                      ip4_local_last_check_t * last_check, u8 * error0)
1480 {
1481   ip4_fib_mtrie_leaf_t leaf0;
1482   ip4_fib_mtrie_t *mtrie0;
1483   const dpo_id_t *dpo0;
1484   load_balance_t *lb0;
1485   u32 lbi0;
1486
1487   vnet_buffer (b)->ip.fib_index =
1488     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1489     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1490
1491   /*
1492    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1493    *  adjacency for the destination address (the local interface address).
1494    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1495    *  adjacency for the source address (the remote sender's address)
1496    */
1497   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1498       last_check->first)
1499     {
1500       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1501       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1502       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1503       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1504       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505
1506       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1507         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1508       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1509
1510       lb0 = load_balance_get (lbi0);
1511       dpo0 = load_balance_get_bucket_i (lb0, 0);
1512
1513       /*
1514        * Must have a route to source otherwise we drop the packet.
1515        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1516        *
1517        * The checks are:
1518        *  - the source is a recieve => it's from us => bogus, do this
1519        *    first since it sets a different error code.
1520        *  - uRPF check for any route to source - accept if passes.
1521        *  - allow packets destined to the broadcast address from unknown sources
1522        */
1523
1524       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1525                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1526                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1527       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1528                   && !fib_urpf_check_size (lb0->lb_urpf)
1529                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1530                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1531
1532       last_check->src.as_u32 = ip0->src_address.as_u32;
1533       last_check->lbi = lbi0;
1534       last_check->error = *error0;
1535       last_check->first = 0;
1536     }
1537   else
1538     {
1539       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1540         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1541       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1542       *error0 = last_check->error;
1543     }
1544 }
1545
1546 static inline void
1547 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1548                         ip4_local_last_check_t * last_check, u8 * error)
1549 {
1550   ip4_fib_mtrie_leaf_t leaf[2];
1551   ip4_fib_mtrie_t *mtrie[2];
1552   const dpo_id_t *dpo[2];
1553   load_balance_t *lb[2];
1554   u32 not_last_hit;
1555   u32 lbi[2];
1556
1557   not_last_hit = last_check->first;
1558   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1559   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1560
1561   vnet_buffer (b[0])->ip.fib_index =
1562     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1563     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1564     vnet_buffer (b[0])->ip.fib_index;
1565
1566   vnet_buffer (b[1])->ip.fib_index =
1567     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1568     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1569     vnet_buffer (b[1])->ip.fib_index;
1570
1571   /*
1572    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1573    *  adjacency for the destination address (the local interface address).
1574    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1575    *  adjacency for the source address (the remote sender's address)
1576    */
1577   if (PREDICT_TRUE (not_last_hit))
1578     {
1579       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1580       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1581
1582       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1583       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1584
1585       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1586                                            &ip[0]->src_address, 2);
1587       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1588                                            &ip[1]->src_address, 2);
1589
1590       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1591                                            &ip[0]->src_address, 3);
1592       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1593                                            &ip[1]->src_address, 3);
1594
1595       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1596       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1597
1598       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1599         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1600       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1601
1602       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1603         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1604       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1605
1606       lb[0] = load_balance_get (lbi[0]);
1607       lb[1] = load_balance_get (lbi[1]);
1608
1609       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1610       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1611
1612       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1613                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1614                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1615       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1616                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1617                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1618                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1619
1620       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1621                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1622                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1623       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1625                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1626                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1627
1628       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1629       last_check->lbi = lbi[1];
1630       last_check->error = error[1];
1631       last_check->first = 0;
1632     }
1633   else
1634     {
1635       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1636         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1637       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1638
1639       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1640         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1641       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1642
1643       error[0] = last_check->error;
1644       error[1] = last_check->error;
1645     }
1646 }
1647
1648 enum ip_local_packet_type_e
1649 {
1650   IP_LOCAL_PACKET_TYPE_L4,
1651   IP_LOCAL_PACKET_TYPE_NAT,
1652   IP_LOCAL_PACKET_TYPE_FRAG,
1653 };
1654
1655 /**
1656  * Determine packet type and next node.
1657  *
1658  * The expectation is that all packets that are not L4 will skip
1659  * checksums and source checks.
1660  */
1661 always_inline u8
1662 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1663 {
1664   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1665
1666   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1667     {
1668       *next = IP_LOCAL_NEXT_REASSEMBLY;
1669       return IP_LOCAL_PACKET_TYPE_FRAG;
1670     }
1671   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1672     {
1673       *next = lm->local_next_by_ip_protocol[ip->protocol];
1674       return IP_LOCAL_PACKET_TYPE_NAT;
1675     }
1676
1677   *next = lm->local_next_by_ip_protocol[ip->protocol];
1678   return IP_LOCAL_PACKET_TYPE_L4;
1679 }
1680
1681 static inline uword
1682 ip4_local_inline (vlib_main_t * vm,
1683                   vlib_node_runtime_t * node,
1684                   vlib_frame_t * frame, int head_of_feature_arc)
1685 {
1686   u32 *from, n_left_from;
1687   vlib_node_runtime_t *error_node =
1688     vlib_node_get_runtime (vm, ip4_local_node.index);
1689   u16 nexts[VLIB_FRAME_SIZE], *next;
1690   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1691   ip4_header_t *ip[2];
1692   u8 error[2], pt[2];
1693
1694   ip4_local_last_check_t last_check = {
1695     /*
1696      * 0.0.0.0 can appear as the source address of an IP packet,
1697      * as can any other address, hence the need to use the 'first'
1698      * member to make sure the .lbi is initialised for the first
1699      * packet.
1700      */
1701     .src = {.as_u32 = 0},
1702     .lbi = ~0,
1703     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1704     .first = 1,
1705   };
1706
1707   from = vlib_frame_vector_args (frame);
1708   n_left_from = frame->n_vectors;
1709
1710   if (node->flags & VLIB_NODE_FLAG_TRACE)
1711     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1712
1713   vlib_get_buffers (vm, from, bufs, n_left_from);
1714   b = bufs;
1715   next = nexts;
1716
1717   while (n_left_from >= 6)
1718     {
1719       u8 not_batch = 0;
1720
1721       /* Prefetch next iteration. */
1722       {
1723         vlib_prefetch_buffer_header (b[4], LOAD);
1724         vlib_prefetch_buffer_header (b[5], LOAD);
1725
1726         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1727         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1728       }
1729
1730       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1731
1732       ip[0] = vlib_buffer_get_current (b[0]);
1733       ip[1] = vlib_buffer_get_current (b[1]);
1734
1735       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1736       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1737
1738       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1739       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1740
1741       not_batch = pt[0] ^ pt[1];
1742
1743       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1744         goto skip_checks;
1745
1746       if (PREDICT_TRUE (not_batch == 0))
1747         {
1748           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1749           ip4_local_check_src_x2 (b, ip, &last_check, error);
1750         }
1751       else
1752         {
1753           if (!pt[0])
1754             {
1755               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1756               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1757             }
1758           if (!pt[1])
1759             {
1760               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1761               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1762             }
1763         }
1764
1765     skip_checks:
1766
1767       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1768                                     head_of_feature_arc);
1769       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1770                                     head_of_feature_arc);
1771
1772       b += 2;
1773       next += 2;
1774       n_left_from -= 2;
1775     }
1776
1777   while (n_left_from > 0)
1778     {
1779       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1780
1781       ip[0] = vlib_buffer_get_current (b[0]);
1782       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1783       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1784
1785       if (head_of_feature_arc == 0 || pt[0])
1786         goto skip_check;
1787
1788       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1789       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1790
1791     skip_check:
1792
1793       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1794                                     head_of_feature_arc);
1795
1796       b += 1;
1797       next += 1;
1798       n_left_from -= 1;
1799     }
1800
1801   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1802   return frame->n_vectors;
1803 }
1804
1805 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1806                                vlib_frame_t * frame)
1807 {
1808   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1809 }
1810
1811 /* *INDENT-OFF* */
1812 VLIB_REGISTER_NODE (ip4_local_node) =
1813 {
1814   .name = "ip4-local",
1815   .vector_size = sizeof (u32),
1816   .format_trace = format_ip4_forward_next_trace,
1817   .n_errors = IP4_N_ERROR,
1818   .error_strings = ip4_error_strings,
1819   .n_next_nodes = IP_LOCAL_N_NEXT,
1820   .next_nodes =
1821   {
1822     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1823     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1824     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1825     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1826     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1827   },
1828 };
1829 /* *INDENT-ON* */
1830
1831
1832 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1833                                           vlib_node_runtime_t * node,
1834                                           vlib_frame_t * frame)
1835 {
1836   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1837 }
1838
1839 /* *INDENT-OFF* */
1840 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1841   .name = "ip4-local-end-of-arc",
1842   .vector_size = sizeof (u32),
1843
1844   .format_trace = format_ip4_forward_next_trace,
1845   .sibling_of = "ip4-local",
1846 };
1847
1848 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1849   .arc_name = "ip4-local",
1850   .node_name = "ip4-local-end-of-arc",
1851   .runs_before = 0, /* not before any other features */
1852 };
1853 /* *INDENT-ON* */
1854
1855 #ifndef CLIB_MARCH_VARIANT
1856 void
1857 ip4_register_protocol (u32 protocol, u32 node_index)
1858 {
1859   vlib_main_t *vm = vlib_get_main ();
1860   ip4_main_t *im = &ip4_main;
1861   ip_lookup_main_t *lm = &im->lookup_main;
1862
1863   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1864   lm->local_next_by_ip_protocol[protocol] =
1865     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1866 }
1867
1868 void
1869 ip4_unregister_protocol (u32 protocol)
1870 {
1871   ip4_main_t *im = &ip4_main;
1872   ip_lookup_main_t *lm = &im->lookup_main;
1873
1874   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1875   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1876 }
1877 #endif
1878
1879 static clib_error_t *
1880 show_ip_local_command_fn (vlib_main_t * vm,
1881                           unformat_input_t * input, vlib_cli_command_t * cmd)
1882 {
1883   ip4_main_t *im = &ip4_main;
1884   ip_lookup_main_t *lm = &im->lookup_main;
1885   int i;
1886
1887   vlib_cli_output (vm, "Protocols handled by ip4_local");
1888   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1889     {
1890       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1891         {
1892           u32 node_index = vlib_get_node (vm,
1893                                           ip4_local_node.index)->
1894             next_nodes[lm->local_next_by_ip_protocol[i]];
1895           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1896                            format_vlib_node_name, vm, node_index);
1897         }
1898     }
1899   return 0;
1900 }
1901
1902
1903
1904 /*?
1905  * Display the set of protocols handled by the local IPv4 stack.
1906  *
1907  * @cliexpar
1908  * Example of how to display local protocol table:
1909  * @cliexstart{show ip local}
1910  * Protocols handled by ip4_local
1911  * 1
1912  * 17
1913  * 47
1914  * @cliexend
1915 ?*/
1916 /* *INDENT-OFF* */
1917 VLIB_CLI_COMMAND (show_ip_local, static) =
1918 {
1919   .path = "show ip local",
1920   .function = show_ip_local_command_fn,
1921   .short_help = "show ip local",
1922 };
1923 /* *INDENT-ON* */
1924
1925 typedef enum
1926 {
1927   IP4_REWRITE_NEXT_DROP,
1928   IP4_REWRITE_NEXT_ICMP_ERROR,
1929   IP4_REWRITE_NEXT_FRAGMENT,
1930   IP4_REWRITE_N_NEXT            /* Last */
1931 } ip4_rewrite_next_t;
1932
1933 /**
1934  * This bits of an IPv4 address to mask to construct a multicast
1935  * MAC address
1936  */
1937 #if CLIB_ARCH_IS_BIG_ENDIAN
1938 #define IP4_MCAST_ADDR_MASK 0x007fffff
1939 #else
1940 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1941 #endif
1942
1943 always_inline void
1944 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1945                u16 adj_packet_bytes, bool df, u16 * next,
1946                u8 is_midchain, u32 * error)
1947 {
1948   if (packet_len > adj_packet_bytes)
1949     {
1950       *error = IP4_ERROR_MTU_EXCEEDED;
1951       if (df)
1952         {
1953           icmp4_error_set_vnet_buffer
1954             (b, ICMP4_destination_unreachable,
1955              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1956              adj_packet_bytes);
1957           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1958         }
1959       else
1960         {
1961           /* IP fragmentation */
1962           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1963                                    (is_midchain ?
1964                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1965                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1966           *next = IP4_REWRITE_NEXT_FRAGMENT;
1967         }
1968     }
1969 }
1970
1971 /* increment TTL & update checksum.
1972    Works either endian, so no need for byte swap. */
1973 static_always_inline void
1974 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1975 {
1976   i32 ttl;
1977   u32 checksum;
1978   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1979     return;
1980
1981   ttl = ip->ttl;
1982
1983   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1984   checksum += checksum >= 0xffff;
1985
1986   ip->checksum = checksum;
1987   ttl += 1;
1988   ip->ttl = ttl;
1989
1990   ASSERT (ip->checksum == ip4_header_checksum (ip));
1991 }
1992
1993 /* Decrement TTL & update checksum.
1994    Works either endian, so no need for byte swap. */
1995 static_always_inline void
1996 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
1997                             u32 * error)
1998 {
1999   i32 ttl;
2000   u32 checksum;
2001   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2002     return;
2003
2004   ttl = ip->ttl;
2005
2006   /* Input node should have reject packets with ttl 0. */
2007   ASSERT (ip->ttl > 0);
2008
2009   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2010   checksum += checksum >= 0xffff;
2011
2012   ip->checksum = checksum;
2013   ttl -= 1;
2014   ip->ttl = ttl;
2015
2016   /*
2017    * If the ttl drops below 1 when forwarding, generate
2018    * an ICMP response.
2019    */
2020   if (PREDICT_FALSE (ttl <= 0))
2021     {
2022       *error = IP4_ERROR_TIME_EXPIRED;
2023       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2024       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2025                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2026                                    0);
2027       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2028     }
2029
2030   /* Verify checksum. */
2031   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2032           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2033 }
2034
2035
2036 always_inline uword
2037 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2038                              vlib_node_runtime_t * node,
2039                              vlib_frame_t * frame,
2040                              int do_counters, int is_midchain, int is_mcast)
2041 {
2042   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2043   u32 *from = vlib_frame_vector_args (frame);
2044   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2045   u16 nexts[VLIB_FRAME_SIZE], *next;
2046   u32 n_left_from;
2047   vlib_node_runtime_t *error_node =
2048     vlib_node_get_runtime (vm, ip4_input_node.index);
2049
2050   n_left_from = frame->n_vectors;
2051   u32 thread_index = vm->thread_index;
2052
2053   vlib_get_buffers (vm, from, bufs, n_left_from);
2054   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2055
2056 #if (CLIB_N_PREFETCHES >= 8)
2057   if (n_left_from >= 6)
2058     {
2059       int i;
2060       for (i = 2; i < 6; i++)
2061         vlib_prefetch_buffer_header (bufs[i], LOAD);
2062     }
2063
2064   next = nexts;
2065   b = bufs;
2066   while (n_left_from >= 8)
2067     {
2068       const ip_adjacency_t *adj0, *adj1;
2069       ip4_header_t *ip0, *ip1;
2070       u32 rw_len0, error0, adj_index0;
2071       u32 rw_len1, error1, adj_index1;
2072       u32 tx_sw_if_index0, tx_sw_if_index1;
2073       u8 *p;
2074
2075       vlib_prefetch_buffer_header (b[6], LOAD);
2076       vlib_prefetch_buffer_header (b[7], LOAD);
2077
2078       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2079       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2080
2081       /*
2082        * pre-fetch the per-adjacency counters
2083        */
2084       if (do_counters)
2085         {
2086           vlib_prefetch_combined_counter (&adjacency_counters,
2087                                           thread_index, adj_index0);
2088           vlib_prefetch_combined_counter (&adjacency_counters,
2089                                           thread_index, adj_index1);
2090         }
2091
2092       ip0 = vlib_buffer_get_current (b[0]);
2093       ip1 = vlib_buffer_get_current (b[1]);
2094
2095       error0 = error1 = IP4_ERROR_NONE;
2096
2097       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2098       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2099
2100       /* Rewrite packet header and updates lengths. */
2101       adj0 = adj_get (adj_index0);
2102       adj1 = adj_get (adj_index1);
2103
2104       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2105       rw_len0 = adj0[0].rewrite_header.data_bytes;
2106       rw_len1 = adj1[0].rewrite_header.data_bytes;
2107       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2108       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2109
2110       p = vlib_buffer_get_current (b[2]);
2111       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2112       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2113
2114       p = vlib_buffer_get_current (b[3]);
2115       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2116       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2117
2118       /* Check MTU of outgoing interface. */
2119       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2120       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2121
2122       if (b[0]->flags & VNET_BUFFER_F_GSO)
2123         ip0_len = gso_mtu_sz (b[0]);
2124       if (b[1]->flags & VNET_BUFFER_F_GSO)
2125         ip1_len = gso_mtu_sz (b[1]);
2126
2127       ip4_mtu_check (b[0], ip0_len,
2128                      adj0[0].rewrite_header.max_l3_packet_bytes,
2129                      ip0->flags_and_fragment_offset &
2130                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2131                      next + 0, is_midchain, &error0);
2132       ip4_mtu_check (b[1], ip1_len,
2133                      adj1[0].rewrite_header.max_l3_packet_bytes,
2134                      ip1->flags_and_fragment_offset &
2135                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2136                      next + 1, is_midchain, &error1);
2137
2138       if (is_mcast)
2139         {
2140           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2141                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2142                     IP4_ERROR_SAME_INTERFACE : error0);
2143           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2144                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2145                     IP4_ERROR_SAME_INTERFACE : error1);
2146         }
2147
2148       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2149        * to see the IP header */
2150       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2151         {
2152           u32 next_index = adj0[0].rewrite_header.next_index;
2153           vlib_buffer_advance (b[0], -(word) rw_len0);
2154
2155           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2156           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2157
2158           if (PREDICT_FALSE
2159               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2160             vnet_feature_arc_start (lm->output_feature_arc_index,
2161                                     tx_sw_if_index0, &next_index, b[0]);
2162           next[0] = next_index;
2163           if (is_midchain)
2164             calc_checksums (vm, b[0]);
2165         }
2166       else
2167         {
2168           b[0]->error = error_node->errors[error0];
2169           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2170             ip4_ttl_inc (b[0], ip0);
2171         }
2172       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2173         {
2174           u32 next_index = adj1[0].rewrite_header.next_index;
2175           vlib_buffer_advance (b[1], -(word) rw_len1);
2176
2177           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2178           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2179
2180           if (PREDICT_FALSE
2181               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2182             vnet_feature_arc_start (lm->output_feature_arc_index,
2183                                     tx_sw_if_index1, &next_index, b[1]);
2184           next[1] = next_index;
2185           if (is_midchain)
2186             calc_checksums (vm, b[1]);
2187         }
2188       else
2189         {
2190           b[1]->error = error_node->errors[error1];
2191           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2192             ip4_ttl_inc (b[1], ip1);
2193         }
2194
2195       /* Guess we are only writing on simple Ethernet header. */
2196       vnet_rewrite_two_headers (adj0[0], adj1[0],
2197                                 ip0, ip1, sizeof (ethernet_header_t));
2198
2199       if (do_counters)
2200         {
2201           if (error0 == IP4_ERROR_NONE)
2202             vlib_increment_combined_counter
2203               (&adjacency_counters,
2204                thread_index,
2205                adj_index0, 1,
2206                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2207
2208           if (error1 == IP4_ERROR_NONE)
2209             vlib_increment_combined_counter
2210               (&adjacency_counters,
2211                thread_index,
2212                adj_index1, 1,
2213                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2214         }
2215
2216       if (is_midchain)
2217         {
2218           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2219             adj0->sub_type.midchain.fixup_func
2220               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2221           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2222             adj1->sub_type.midchain.fixup_func
2223               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2224         }
2225
2226       if (is_mcast)
2227         {
2228           /* copy bytes from the IP address into the MAC rewrite */
2229           if (error0 == IP4_ERROR_NONE)
2230             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2231                                         adj0->rewrite_header.dst_mcast_offset,
2232                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2233           if (error1 == IP4_ERROR_NONE)
2234             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2235                                         adj1->rewrite_header.dst_mcast_offset,
2236                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2237         }
2238
2239       next += 2;
2240       b += 2;
2241       n_left_from -= 2;
2242     }
2243 #elif (CLIB_N_PREFETCHES >= 4)
2244   next = nexts;
2245   b = bufs;
2246   while (n_left_from >= 1)
2247     {
2248       ip_adjacency_t *adj0;
2249       ip4_header_t *ip0;
2250       u32 rw_len0, error0, adj_index0;
2251       u32 tx_sw_if_index0;
2252       u8 *p;
2253
2254       /* Prefetch next iteration */
2255       if (PREDICT_TRUE (n_left_from >= 4))
2256         {
2257           ip_adjacency_t *adj2;
2258           u32 adj_index2;
2259
2260           vlib_prefetch_buffer_header (b[3], LOAD);
2261           vlib_prefetch_buffer_data (b[2], LOAD);
2262
2263           /* Prefetch adj->rewrite_header */
2264           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2265           adj2 = adj_get (adj_index2);
2266           p = (u8 *) adj2;
2267           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2268                          LOAD);
2269         }
2270
2271       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2272
2273       /*
2274        * Prefetch the per-adjacency counters
2275        */
2276       if (do_counters)
2277         {
2278           vlib_prefetch_combined_counter (&adjacency_counters,
2279                                           thread_index, adj_index0);
2280         }
2281
2282       ip0 = vlib_buffer_get_current (b[0]);
2283
2284       error0 = IP4_ERROR_NONE;
2285
2286       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2287
2288       /* Rewrite packet header and updates lengths. */
2289       adj0 = adj_get (adj_index0);
2290
2291       /* Rewrite header was prefetched. */
2292       rw_len0 = adj0[0].rewrite_header.data_bytes;
2293       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2294
2295       /* Check MTU of outgoing interface. */
2296       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2297
2298       if (b[0]->flags & VNET_BUFFER_F_GSO)
2299         ip0_len = gso_mtu_sz (b[0]);
2300
2301       ip4_mtu_check (b[0], ip0_len,
2302                      adj0[0].rewrite_header.max_l3_packet_bytes,
2303                      ip0->flags_and_fragment_offset &
2304                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2305                      next + 0, is_midchain, &error0);
2306
2307       if (is_mcast)
2308         {
2309           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2310                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2311                     IP4_ERROR_SAME_INTERFACE : error0);
2312         }
2313
2314       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2315        * to see the IP header */
2316       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2317         {
2318           u32 next_index = adj0[0].rewrite_header.next_index;
2319           vlib_buffer_advance (b[0], -(word) rw_len0);
2320           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2321           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2322
2323           if (PREDICT_FALSE
2324               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2325             vnet_feature_arc_start (lm->output_feature_arc_index,
2326                                     tx_sw_if_index0, &next_index, b[0]);
2327           next[0] = next_index;
2328
2329           if (is_midchain)
2330             calc_checksums (vm, b[0]);
2331
2332           /* Guess we are only writing on simple Ethernet header. */
2333           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2334
2335           /*
2336            * Bump the per-adjacency counters
2337            */
2338           if (do_counters)
2339             vlib_increment_combined_counter
2340               (&adjacency_counters,
2341                thread_index,
2342                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2343                                                            b[0]) + rw_len0);
2344
2345           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2346             adj0->sub_type.midchain.fixup_func
2347               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2348
2349           if (is_mcast)
2350             /* copy bytes from the IP address into the MAC rewrite */
2351             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2352                                         adj0->rewrite_header.dst_mcast_offset,
2353                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2354         }
2355       else
2356         {
2357           b[0]->error = error_node->errors[error0];
2358           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2359             ip4_ttl_inc (b[0], ip0);
2360         }
2361
2362       next += 1;
2363       b += 1;
2364       n_left_from -= 1;
2365     }
2366 #endif
2367
2368   while (n_left_from > 0)
2369     {
2370       ip_adjacency_t *adj0;
2371       ip4_header_t *ip0;
2372       u32 rw_len0, adj_index0, error0;
2373       u32 tx_sw_if_index0;
2374
2375       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2376
2377       adj0 = adj_get (adj_index0);
2378
2379       if (do_counters)
2380         vlib_prefetch_combined_counter (&adjacency_counters,
2381                                         thread_index, adj_index0);
2382
2383       ip0 = vlib_buffer_get_current (b[0]);
2384
2385       error0 = IP4_ERROR_NONE;
2386
2387       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2388
2389
2390       /* Update packet buffer attributes/set output interface. */
2391       rw_len0 = adj0[0].rewrite_header.data_bytes;
2392       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2393
2394       /* Check MTU of outgoing interface. */
2395       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2396       if (b[0]->flags & VNET_BUFFER_F_GSO)
2397         ip0_len = gso_mtu_sz (b[0]);
2398
2399       ip4_mtu_check (b[0], ip0_len,
2400                      adj0[0].rewrite_header.max_l3_packet_bytes,
2401                      ip0->flags_and_fragment_offset &
2402                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2403                      next + 0, is_midchain, &error0);
2404
2405       if (is_mcast)
2406         {
2407           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2408                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2409                     IP4_ERROR_SAME_INTERFACE : error0);
2410         }
2411
2412       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2413        * to see the IP header */
2414       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2415         {
2416           u32 next_index = adj0[0].rewrite_header.next_index;
2417           vlib_buffer_advance (b[0], -(word) rw_len0);
2418           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2419           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2420
2421           if (PREDICT_FALSE
2422               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2423             vnet_feature_arc_start (lm->output_feature_arc_index,
2424                                     tx_sw_if_index0, &next_index, b[0]);
2425           next[0] = next_index;
2426
2427           if (is_midchain)
2428             /* this acts on the packet that is about to be encapped */
2429             calc_checksums (vm, b[0]);
2430
2431           /* Guess we are only writing on simple Ethernet header. */
2432           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2433
2434           if (do_counters)
2435             vlib_increment_combined_counter
2436               (&adjacency_counters,
2437                thread_index, adj_index0, 1,
2438                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2439
2440           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2441             adj0->sub_type.midchain.fixup_func
2442               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2443
2444           if (is_mcast)
2445             /* copy bytes from the IP address into the MAC rewrite */
2446             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2447                                         adj0->rewrite_header.dst_mcast_offset,
2448                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2449         }
2450       else
2451         {
2452           b[0]->error = error_node->errors[error0];
2453           /* undo the TTL decrement - we'll be back to do it again */
2454           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2455             ip4_ttl_inc (b[0], ip0);
2456         }
2457
2458       next += 1;
2459       b += 1;
2460       n_left_from -= 1;
2461     }
2462
2463
2464   /* Need to do trace after rewrites to pick up new packet data. */
2465   if (node->flags & VLIB_NODE_FLAG_TRACE)
2466     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2467
2468   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2469   return frame->n_vectors;
2470 }
2471
2472 always_inline uword
2473 ip4_rewrite_inline (vlib_main_t * vm,
2474                     vlib_node_runtime_t * node,
2475                     vlib_frame_t * frame,
2476                     int do_counters, int is_midchain, int is_mcast)
2477 {
2478   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2479                                       is_midchain, is_mcast);
2480 }
2481
2482
2483 /** @brief IPv4 rewrite node.
2484     @node ip4-rewrite
2485
2486     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2487     header checksum, fetch the ip adjacency, check the outbound mtu,
2488     apply the adjacency rewrite, and send pkts to the adjacency
2489     rewrite header's rewrite_next_index.
2490
2491     @param vm vlib_main_t corresponding to the current thread
2492     @param node vlib_node_runtime_t
2493     @param frame vlib_frame_t whose contents should be dispatched
2494
2495     @par Graph mechanics: buffer metadata, next index usage
2496
2497     @em Uses:
2498     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2499         - the rewrite adjacency index
2500     - <code>adj->lookup_next_index</code>
2501         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2502           the packet will be dropped.
2503     - <code>adj->rewrite_header</code>
2504         - Rewrite string length, rewrite string, next_index
2505
2506     @em Sets:
2507     - <code>b->current_data, b->current_length</code>
2508         - Updated net of applying the rewrite string
2509
2510     <em>Next Indices:</em>
2511     - <code> adj->rewrite_header.next_index </code>
2512       or @c ip4-drop
2513 */
2514
2515 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2516                                  vlib_frame_t * frame)
2517 {
2518   if (adj_are_counters_enabled ())
2519     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2520   else
2521     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2522 }
2523
2524 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2525                                        vlib_node_runtime_t * node,
2526                                        vlib_frame_t * frame)
2527 {
2528   if (adj_are_counters_enabled ())
2529     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2530   else
2531     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2532 }
2533
2534 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2535                                   vlib_node_runtime_t * node,
2536                                   vlib_frame_t * frame)
2537 {
2538   if (adj_are_counters_enabled ())
2539     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2540   else
2541     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2542 }
2543
2544 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2545                                        vlib_node_runtime_t * node,
2546                                        vlib_frame_t * frame)
2547 {
2548   if (adj_are_counters_enabled ())
2549     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2550   else
2551     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2552 }
2553
2554 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2555                                         vlib_node_runtime_t * node,
2556                                         vlib_frame_t * frame)
2557 {
2558   if (adj_are_counters_enabled ())
2559     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2560   else
2561     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2562 }
2563
2564 /* *INDENT-OFF* */
2565 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2566   .name = "ip4-rewrite",
2567   .vector_size = sizeof (u32),
2568
2569   .format_trace = format_ip4_rewrite_trace,
2570
2571   .n_next_nodes = IP4_REWRITE_N_NEXT,
2572   .next_nodes = {
2573     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2574     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2575     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2576   },
2577 };
2578
2579 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2580   .name = "ip4-rewrite-bcast",
2581   .vector_size = sizeof (u32),
2582
2583   .format_trace = format_ip4_rewrite_trace,
2584   .sibling_of = "ip4-rewrite",
2585 };
2586
2587 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2588   .name = "ip4-rewrite-mcast",
2589   .vector_size = sizeof (u32),
2590
2591   .format_trace = format_ip4_rewrite_trace,
2592   .sibling_of = "ip4-rewrite",
2593 };
2594
2595 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2596   .name = "ip4-mcast-midchain",
2597   .vector_size = sizeof (u32),
2598
2599   .format_trace = format_ip4_rewrite_trace,
2600   .sibling_of = "ip4-rewrite",
2601 };
2602
2603 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2604   .name = "ip4-midchain",
2605   .vector_size = sizeof (u32),
2606   .format_trace = format_ip4_rewrite_trace,
2607   .sibling_of = "ip4-rewrite",
2608 };
2609 /* *INDENT-ON */
2610
2611 static int
2612 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2613 {
2614   ip4_fib_mtrie_t *mtrie0;
2615   ip4_fib_mtrie_leaf_t leaf0;
2616   u32 lbi0;
2617
2618   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2619
2620   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2621   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2622   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2623
2624   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2625
2626   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2627 }
2628
2629 static clib_error_t *
2630 test_lookup_command_fn (vlib_main_t * vm,
2631                         unformat_input_t * input, vlib_cli_command_t * cmd)
2632 {
2633   ip4_fib_t *fib;
2634   u32 table_id = 0;
2635   f64 count = 1;
2636   u32 n;
2637   int i;
2638   ip4_address_t ip4_base_address;
2639   u64 errors = 0;
2640
2641   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2642     {
2643       if (unformat (input, "table %d", &table_id))
2644         {
2645           /* Make sure the entry exists. */
2646           fib = ip4_fib_get (table_id);
2647           if ((fib) && (fib->index != table_id))
2648             return clib_error_return (0, "<fib-index> %d does not exist",
2649                                       table_id);
2650         }
2651       else if (unformat (input, "count %f", &count))
2652         ;
2653
2654       else if (unformat (input, "%U",
2655                          unformat_ip4_address, &ip4_base_address))
2656         ;
2657       else
2658         return clib_error_return (0, "unknown input `%U'",
2659                                   format_unformat_error, input);
2660     }
2661
2662   n = count;
2663
2664   for (i = 0; i < n; i++)
2665     {
2666       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2667         errors++;
2668
2669       ip4_base_address.as_u32 =
2670         clib_host_to_net_u32 (1 +
2671                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2672     }
2673
2674   if (errors)
2675     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2676   else
2677     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2678
2679   return 0;
2680 }
2681
2682 /*?
2683  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2684  * given FIB table to determine if there is a conflict with the
2685  * adjacency table. The fib-id can be determined by using the
2686  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2687  * of 0 is used.
2688  *
2689  * @todo This command uses fib-id, other commands use table-id (not
2690  * just a name, they are different indexes). Would like to change this
2691  * to table-id for consistency.
2692  *
2693  * @cliexpar
2694  * Example of how to run the test lookup command:
2695  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2696  * No errors in 2 lookups
2697  * @cliexend
2698 ?*/
2699 /* *INDENT-OFF* */
2700 VLIB_CLI_COMMAND (lookup_test_command, static) =
2701 {
2702   .path = "test lookup",
2703   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2704   .function = test_lookup_command_fn,
2705 };
2706 /* *INDENT-ON* */
2707
2708 #ifndef CLIB_MARCH_VARIANT
2709 int
2710 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2711 {
2712   u32 fib_index;
2713
2714   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2715
2716   if (~0 == fib_index)
2717     return VNET_API_ERROR_NO_SUCH_FIB;
2718
2719   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2720                                   flow_hash_config);
2721
2722   return 0;
2723 }
2724 #endif
2725
2726 static clib_error_t *
2727 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2728                              unformat_input_t * input,
2729                              vlib_cli_command_t * cmd)
2730 {
2731   int matched = 0;
2732   u32 table_id = 0;
2733   u32 flow_hash_config = 0;
2734   int rv;
2735
2736   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2737     {
2738       if (unformat (input, "table %d", &table_id))
2739         matched = 1;
2740 #define _(a,v) \
2741     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2742       foreach_flow_hash_bit
2743 #undef _
2744         else
2745         break;
2746     }
2747
2748   if (matched == 0)
2749     return clib_error_return (0, "unknown input `%U'",
2750                               format_unformat_error, input);
2751
2752   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2753   switch (rv)
2754     {
2755     case 0:
2756       break;
2757
2758     case VNET_API_ERROR_NO_SUCH_FIB:
2759       return clib_error_return (0, "no such FIB table %d", table_id);
2760
2761     default:
2762       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2763       break;
2764     }
2765
2766   return 0;
2767 }
2768
2769 /*?
2770  * Configure the set of IPv4 fields used by the flow hash.
2771  *
2772  * @cliexpar
2773  * Example of how to set the flow hash on a given table:
2774  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2775  * Example of display the configured flow hash:
2776  * @cliexstart{show ip fib}
2777  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2778  * 0.0.0.0/0
2779  *   unicast-ip4-chain
2780  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2781  *     [0] [@0]: dpo-drop ip6
2782  * 0.0.0.0/32
2783  *   unicast-ip4-chain
2784  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2785  *     [0] [@0]: dpo-drop ip6
2786  * 224.0.0.0/8
2787  *   unicast-ip4-chain
2788  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2789  *     [0] [@0]: dpo-drop ip6
2790  * 6.0.1.2/32
2791  *   unicast-ip4-chain
2792  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2793  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2794  * 7.0.0.1/32
2795  *   unicast-ip4-chain
2796  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2797  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2798  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2799  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2800  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2801  * 240.0.0.0/8
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2804  *     [0] [@0]: dpo-drop ip6
2805  * 255.255.255.255/32
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2808  *     [0] [@0]: dpo-drop ip6
2809  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2810  * 0.0.0.0/0
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2813  *     [0] [@0]: dpo-drop ip6
2814  * 0.0.0.0/32
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2817  *     [0] [@0]: dpo-drop ip6
2818  * 172.16.1.0/24
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2821  *     [0] [@4]: ipv4-glean: af_packet0
2822  * 172.16.1.1/32
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2825  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2826  * 172.16.1.2/32
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2829  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2830  * 172.16.2.0/24
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2833  *     [0] [@4]: ipv4-glean: af_packet1
2834  * 172.16.2.1/32
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2837  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2838  * 224.0.0.0/8
2839  *   unicast-ip4-chain
2840  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2841  *     [0] [@0]: dpo-drop ip6
2842  * 240.0.0.0/8
2843  *   unicast-ip4-chain
2844  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2845  *     [0] [@0]: dpo-drop ip6
2846  * 255.255.255.255/32
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2849  *     [0] [@0]: dpo-drop ip6
2850  * @cliexend
2851 ?*/
2852 /* *INDENT-OFF* */
2853 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2854 {
2855   .path = "set ip flow-hash",
2856   .short_help =
2857   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2858   .function = set_ip_flow_hash_command_fn,
2859 };
2860 /* *INDENT-ON* */
2861
2862 #ifndef CLIB_MARCH_VARIANT
2863 int
2864 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2865                              u32 table_index)
2866 {
2867   vnet_main_t *vnm = vnet_get_main ();
2868   vnet_interface_main_t *im = &vnm->interface_main;
2869   ip4_main_t *ipm = &ip4_main;
2870   ip_lookup_main_t *lm = &ipm->lookup_main;
2871   vnet_classify_main_t *cm = &vnet_classify_main;
2872   ip4_address_t *if_addr;
2873
2874   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2875     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2876
2877   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2878     return VNET_API_ERROR_NO_SUCH_ENTRY;
2879
2880   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2881   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2882
2883   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2884
2885   if (NULL != if_addr)
2886     {
2887       fib_prefix_t pfx = {
2888         .fp_len = 32,
2889         .fp_proto = FIB_PROTOCOL_IP4,
2890         .fp_addr.ip4 = *if_addr,
2891       };
2892       u32 fib_index;
2893
2894       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2895                                                        sw_if_index);
2896
2897
2898       if (table_index != (u32) ~ 0)
2899         {
2900           dpo_id_t dpo = DPO_INVALID;
2901
2902           dpo_set (&dpo,
2903                    DPO_CLASSIFY,
2904                    DPO_PROTO_IP4,
2905                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2906
2907           fib_table_entry_special_dpo_add (fib_index,
2908                                            &pfx,
2909                                            FIB_SOURCE_CLASSIFY,
2910                                            FIB_ENTRY_FLAG_NONE, &dpo);
2911           dpo_reset (&dpo);
2912         }
2913       else
2914         {
2915           fib_table_entry_special_remove (fib_index,
2916                                           &pfx, FIB_SOURCE_CLASSIFY);
2917         }
2918     }
2919
2920   return 0;
2921 }
2922 #endif
2923
2924 static clib_error_t *
2925 set_ip_classify_command_fn (vlib_main_t * vm,
2926                             unformat_input_t * input,
2927                             vlib_cli_command_t * cmd)
2928 {
2929   u32 table_index = ~0;
2930   int table_index_set = 0;
2931   u32 sw_if_index = ~0;
2932   int rv;
2933
2934   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2935     {
2936       if (unformat (input, "table-index %d", &table_index))
2937         table_index_set = 1;
2938       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2939                          vnet_get_main (), &sw_if_index))
2940         ;
2941       else
2942         break;
2943     }
2944
2945   if (table_index_set == 0)
2946     return clib_error_return (0, "classify table-index must be specified");
2947
2948   if (sw_if_index == ~0)
2949     return clib_error_return (0, "interface / subif must be specified");
2950
2951   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2952
2953   switch (rv)
2954     {
2955     case 0:
2956       break;
2957
2958     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2959       return clib_error_return (0, "No such interface");
2960
2961     case VNET_API_ERROR_NO_SUCH_ENTRY:
2962       return clib_error_return (0, "No such classifier table");
2963     }
2964   return 0;
2965 }
2966
2967 /*?
2968  * Assign a classification table to an interface. The classification
2969  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2970  * commands. Once the table is create, use this command to filter packets
2971  * on an interface.
2972  *
2973  * @cliexpar
2974  * Example of how to assign a classification table to an interface:
2975  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2976 ?*/
2977 /* *INDENT-OFF* */
2978 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2979 {
2980     .path = "set ip classify",
2981     .short_help =
2982     "set ip classify intfc <interface> table-index <classify-idx>",
2983     .function = set_ip_classify_command_fn,
2984 };
2985 /* *INDENT-ON* */
2986
2987 static clib_error_t *
2988 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2989 {
2990   ip4_main_t *im = &ip4_main;
2991   uword heapsize = 0;
2992
2993   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2994     {
2995       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2996         ;
2997       else
2998         return clib_error_return (0,
2999                                   "invalid heap-size parameter `%U'",
3000                                   format_unformat_error, input);
3001     }
3002
3003   im->mtrie_heap_size = heapsize;
3004
3005   return 0;
3006 }
3007
3008 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3009
3010 /*
3011  * fd.io coding-style-patch-verification: ON
3012  *
3013  * Local Variables:
3014  * eval: (c-set-style "gnu")
3015  * End:
3016  */