ip: fix ip zero checksum verification
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
384   if (a->address_length <= 30)
385     {
386       pfx_special.fp_len = a->address_length;
387       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
388
389       /* set the glean route for the prefix */
390       fib_table_entry_update_one_path (fib_index, &pfx_special,
391                                        FIB_SOURCE_INTERFACE,
392                                        (FIB_ENTRY_FLAG_CONNECTED |
393                                         FIB_ENTRY_FLAG_ATTACHED),
394                                        DPO_PROTO_IP4,
395                                        /* No next-hop address */
396                                        NULL,
397                                        sw_if_index,
398                                        /* invalid FIB index */
399                                        ~0,
400                                        1,
401                                        /* no out-label stack */
402                                        NULL,
403                                        FIB_ROUTE_PATH_FLAG_NONE);
404
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if:
532    * - deleting last intf addr in prefix
533    * - deleting intf addr used as default source address in glean adjacency
534    *
535    * We're done now otherwise
536    */
537   if ((if_prefix->ref_count > 0) &&
538       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
539     return;
540
541   /* length <= 30, delete glean route, first address, last address */
542   if (address_length <= 30)
543     {
544
545       /* remove glean route for prefix */
546       pfx_special.fp_addr.ip4 = *address;
547       pfx_special.fp_len = address_length;
548       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
549
550       /* if no more intf addresses in prefix, remove other special routes */
551       if (!if_prefix->ref_count)
552         {
553           /* first address in prefix */
554           pfx_special.fp_addr.ip4.as_u32 =
555             address->as_u32 & im->fib_masks[address_length];
556           pfx_special.fp_len = 32;
557
558           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559           fib_table_entry_special_remove (fib_index,
560                                           &pfx_special,
561                                           FIB_SOURCE_INTERFACE);
562
563           /* prefix broadcast address */
564           pfx_special.fp_addr.ip4.as_u32 =
565             address->as_u32 | ~im->fib_masks[address_length];
566           pfx_special.fp_len = 32;
567
568           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
569           fib_table_entry_special_remove (fib_index,
570                                           &pfx_special,
571                                           FIB_SOURCE_INTERFACE);
572         }
573       else
574         /* default source addr just got deleted, find another */
575         {
576           ip_interface_address_t *new_src_ia = NULL;
577           ip4_address_t *new_src_addr = NULL;
578
579           new_src_addr =
580             ip4_interface_address_matching_destination
581               (im, address, sw_if_index, &new_src_ia);
582
583           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
584
585           pfx_special.fp_len = address_length;
586           pfx_special.fp_addr.ip4 = *new_src_addr;
587
588           /* set new glean route for the prefix */
589           fib_table_entry_update_one_path (fib_index, &pfx_special,
590                                            FIB_SOURCE_INTERFACE,
591                                            (FIB_ENTRY_FLAG_CONNECTED |
592                                             FIB_ENTRY_FLAG_ATTACHED),
593                                            DPO_PROTO_IP4,
594                                            /* No next-hop address */
595                                            NULL,
596                                            sw_if_index,
597                                            /* invalid FIB index */
598                                            ~0,
599                                            1,
600                                            /* no out-label stack */
601                                            NULL,
602                                            FIB_ROUTE_PATH_FLAG_NONE);
603           return;
604         }
605     }
606   /* length == 31, delete attached route for the other address */
607   else if (address_length == 31)
608     {
609       pfx_special.fp_addr.ip4.as_u32 =
610         address->as_u32 ^ clib_host_to_net_u32(1);
611
612       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
613     }
614
615   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
616   pool_put (lm->if_prefix_pool, if_prefix);
617 }
618
619 static void
620 ip4_del_interface_routes (u32 sw_if_index,
621                           ip4_main_t * im,
622                           u32 fib_index,
623                           ip4_address_t * address, u32 address_length)
624 {
625   fib_prefix_t pfx = {
626     .fp_len = address_length,
627     .fp_proto = FIB_PROTOCOL_IP4,
628     .fp_addr.ip4 = *address,
629   };
630
631   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
632                                    address, address_length);
633
634   pfx.fp_len = 32;
635   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
636 }
637
638 #ifndef CLIB_MARCH_VARIANT
639 void
640 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
641 {
642   ip4_main_t *im = &ip4_main;
643   vnet_main_t *vnm = vnet_get_main ();
644   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
645
646   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
647
648   /*
649    * enable/disable only on the 1<->0 transition
650    */
651   if (is_enable)
652     {
653       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
654         return;
655     }
656   else
657     {
658       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
659       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
660         return;
661     }
662   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
663                                !is_enable, 0, 0);
664
665
666   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
667                                sw_if_index, !is_enable, 0, 0);
668
669   if (is_enable)
670     hi->l3_if_count++;
671   else if (hi->l3_if_count)
672     hi->l3_if_count--;
673
674   {
675     ip4_enable_disable_interface_callback_t *cb;
676     vec_foreach (cb, im->enable_disable_interface_callbacks)
677       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
678   }
679 }
680
681 static clib_error_t *
682 ip4_add_del_interface_address_internal (vlib_main_t * vm,
683                                         u32 sw_if_index,
684                                         ip4_address_t * address,
685                                         u32 address_length, u32 is_del)
686 {
687   vnet_main_t *vnm = vnet_get_main ();
688   ip4_main_t *im = &ip4_main;
689   ip_lookup_main_t *lm = &im->lookup_main;
690   clib_error_t *error = 0;
691   u32 if_address_index;
692   ip4_address_fib_t ip4_af, *addr_fib = 0;
693
694   /* local0 interface doesn't support IP addressing  */
695   if (sw_if_index == 0)
696     {
697       return
698        clib_error_create ("local0 interface doesn't support IP addressing");
699     }
700
701   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
702   ip4_addr_fib_init (&ip4_af, address,
703                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
704   vec_add1 (addr_fib, ip4_af);
705
706   /*
707    * there is no support for adj-fib handling in the presence of overlapping
708    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
709    * most routers do.
710    */
711   /* *INDENT-OFF* */
712   if (!is_del)
713     {
714       /* When adding an address check that it does not conflict
715          with an existing address on any interface in this table. */
716       ip_interface_address_t *ia;
717       vnet_sw_interface_t *sif;
718
719       pool_foreach(sif, vnm->interface_main.sw_interfaces,
720       ({
721           if (im->fib_index_by_sw_if_index[sw_if_index] ==
722               im->fib_index_by_sw_if_index[sif->sw_if_index])
723             {
724               foreach_ip_interface_address
725                 (&im->lookup_main, ia, sif->sw_if_index,
726                  0 /* honor unnumbered */ ,
727                  ({
728                    ip4_address_t * x =
729                      ip_interface_address_get_address
730                      (&im->lookup_main, ia);
731
732                    if (ip4_destination_matches_route
733                        (im, address, x, ia->address_length) ||
734                        ip4_destination_matches_route (im,
735                                                       x,
736                                                       address,
737                                                       address_length))
738                      {
739                        /* an intf may have >1 addr from the same prefix */
740                        if ((sw_if_index == sif->sw_if_index) &&
741                            (ia->address_length == address_length) &&
742                            (x->as_u32 != address->as_u32))
743                          continue;
744
745                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
746                          /* if the address we're comparing against is stale
747                           * then the CP has not added this one back yet, maybe
748                           * it never will, so we have to assume it won't and
749                           * ignore it. if it does add it back, then it will fail
750                           * because this one is now present */
751                          continue;
752
753                        /* error if the length or intf was different */
754                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
755
756                        error = clib_error_create
757                          ("failed to add %U on %U which conflicts with %U for interface %U",
758                           format_ip4_address_and_length, address,
759                           address_length,
760                           format_vnet_sw_if_index_name, vnm,
761                           sw_if_index,
762                           format_ip4_address_and_length, x,
763                           ia->address_length,
764                           format_vnet_sw_if_index_name, vnm,
765                           sif->sw_if_index);
766                        goto done;
767                      }
768                  }));
769             }
770       }));
771     }
772   /* *INDENT-ON* */
773
774   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
775
776   if (is_del)
777     {
778       if (~0 == if_address_index)
779         {
780           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
781           error = clib_error_create ("%U not found for interface %U",
782                                      lm->format_address_and_length,
783                                      addr_fib, address_length,
784                                      format_vnet_sw_if_index_name, vnm,
785                                      sw_if_index);
786           goto done;
787         }
788
789       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
790                                         address_length, sw_if_index);
791       if (error)
792         goto done;
793     }
794   else
795     {
796       if (~0 != if_address_index)
797         {
798           ip_interface_address_t *ia;
799
800           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
801
802           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
803             {
804               if (ia->sw_if_index == sw_if_index)
805                 {
806                   /* re-adding an address during the replace action.
807                    * consdier this the update. clear the flag and
808                    * we're done */
809                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
810                   goto done;
811                 }
812               else
813                 {
814                   /* The prefix is moving from one interface to another.
815                    * delete the stale and add the new */
816                   ip4_add_del_interface_address_internal (vm,
817                                                           ia->sw_if_index,
818                                                           address,
819                                                           address_length, 1);
820                   ia = NULL;
821                   error = ip_interface_address_add (lm, sw_if_index,
822                                                     addr_fib, address_length,
823                                                     &if_address_index);
824                 }
825             }
826           else
827             {
828               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
829               error = clib_error_create
830                 ("Prefix %U already found on interface %U",
831                  lm->format_address_and_length, addr_fib, address_length,
832                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
833             }
834         }
835       else
836         error = ip_interface_address_add (lm, sw_if_index,
837                                           addr_fib, address_length,
838                                           &if_address_index);
839     }
840
841   if (error)
842     goto done;
843
844   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
845   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
846
847   /* intf addr routes are added/deleted on admin up/down */
848   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
849     {
850       if (is_del)
851         ip4_del_interface_routes (sw_if_index,
852                                   im, ip4_af.fib_index, address,
853                                   address_length);
854       else
855         ip4_add_interface_routes (sw_if_index,
856                                   im, ip4_af.fib_index,
857                                   pool_elt_at_index
858                                   (lm->if_address_pool, if_address_index));
859     }
860
861   ip4_add_del_interface_address_callback_t *cb;
862   vec_foreach (cb, im->add_del_interface_address_callbacks)
863     cb->function (im, cb->function_opaque, sw_if_index,
864                   address, address_length, if_address_index, is_del);
865
866 done:
867   vec_free (addr_fib);
868   return error;
869 }
870
871 clib_error_t *
872 ip4_add_del_interface_address (vlib_main_t * vm,
873                                u32 sw_if_index,
874                                ip4_address_t * address,
875                                u32 address_length, u32 is_del)
876 {
877   return ip4_add_del_interface_address_internal
878     (vm, sw_if_index, address, address_length, is_del);
879 }
880
881 void
882 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
883 {
884   ip_interface_address_t *ia;
885   ip4_main_t *im;
886
887   im = &ip4_main;
888
889   /*
890    * when directed broadcast is enabled, the subnet braodcast route will forward
891    * packets using an adjacency with a broadcast MAC. otherwise it drops
892    */
893   /* *INDENT-OFF* */
894   foreach_ip_interface_address(&im->lookup_main, ia,
895                                sw_if_index, 0,
896      ({
897        if (ia->address_length <= 30)
898          {
899            ip4_address_t *ipa;
900
901            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
902
903            fib_prefix_t pfx = {
904              .fp_len = 32,
905              .fp_proto = FIB_PROTOCOL_IP4,
906              .fp_addr = {
907                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
908              },
909            };
910
911            ip4_add_subnet_bcast_route
912              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
913                                                   sw_if_index),
914               &pfx, sw_if_index);
915          }
916      }));
917   /* *INDENT-ON* */
918 }
919 #endif
920
921 static clib_error_t *
922 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
923 {
924   ip4_main_t *im = &ip4_main;
925   ip_interface_address_t *ia;
926   ip4_address_t *a;
927   u32 is_admin_up, fib_index;
928
929   /* Fill in lookup tables with default table (0). */
930   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
931
932   vec_validate_init_empty (im->
933                            lookup_main.if_address_pool_index_by_sw_if_index,
934                            sw_if_index, ~0);
935
936   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
937
938   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
939
940   /* *INDENT-OFF* */
941   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
942                                 0 /* honor unnumbered */,
943   ({
944     a = ip_interface_address_get_address (&im->lookup_main, ia);
945     if (is_admin_up)
946       ip4_add_interface_routes (sw_if_index,
947                                 im, fib_index,
948                                 ia);
949     else
950       ip4_del_interface_routes (sw_if_index,
951                                 im, fib_index,
952                                 a, ia->address_length);
953   }));
954   /* *INDENT-ON* */
955
956   return 0;
957 }
958
959 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
960
961 /* Built-in ip4 unicast rx feature path definition */
962 /* *INDENT-OFF* */
963 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
964 {
965   .arc_name = "ip4-unicast",
966   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
967   .last_in_arc = "ip4-lookup",
968   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
969 };
970
971 VNET_FEATURE_INIT (ip4_flow_classify, static) =
972 {
973   .arc_name = "ip4-unicast",
974   .node_name = "ip4-flow-classify",
975   .runs_before = VNET_FEATURES ("ip4-inacl"),
976 };
977
978 VNET_FEATURE_INIT (ip4_inacl, static) =
979 {
980   .arc_name = "ip4-unicast",
981   .node_name = "ip4-inacl",
982   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
983 };
984
985 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
986 {
987   .arc_name = "ip4-unicast",
988   .node_name = "ip4-source-and-port-range-check-rx",
989   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
990 };
991
992 VNET_FEATURE_INIT (ip4_policer_classify, static) =
993 {
994   .arc_name = "ip4-unicast",
995   .node_name = "ip4-policer-classify",
996   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
997 };
998
999 VNET_FEATURE_INIT (ip4_ipsec, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .node_name = "ipsec4-input-feature",
1003   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_vpath, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "vpath-input-ip4",
1010   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-vxlan-bypass",
1017   .runs_before = VNET_FEATURES ("ip4-lookup"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-not-enabled",
1024   .runs_before = VNET_FEATURES ("ip4-lookup"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_lookup, static) =
1028 {
1029   .arc_name = "ip4-unicast",
1030   .node_name = "ip4-lookup",
1031   .runs_before = 0,     /* not before any other features */
1032 };
1033
1034 /* Built-in ip4 multicast rx feature path definition */
1035 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1036 {
1037   .arc_name = "ip4-multicast",
1038   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1039   .last_in_arc = "ip4-mfib-forward-lookup",
1040   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1041 };
1042
1043 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1044 {
1045   .arc_name = "ip4-multicast",
1046   .node_name = "vpath-input-ip4",
1047   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1048 };
1049
1050 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1051 {
1052   .arc_name = "ip4-multicast",
1053   .node_name = "ip4-not-enabled",
1054   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1055 };
1056
1057 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1058 {
1059   .arc_name = "ip4-multicast",
1060   .node_name = "ip4-mfib-forward-lookup",
1061   .runs_before = 0,     /* last feature */
1062 };
1063
1064 /* Source and port-range check ip4 tx feature path definition */
1065 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1066 {
1067   .arc_name = "ip4-output",
1068   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1069   .last_in_arc = "interface-output",
1070   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1071 };
1072
1073 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1074 {
1075   .arc_name = "ip4-output",
1076   .node_name = "ip4-source-and-port-range-check-tx",
1077   .runs_before = VNET_FEATURES ("ip4-outacl"),
1078 };
1079
1080 VNET_FEATURE_INIT (ip4_outacl, static) =
1081 {
1082   .arc_name = "ip4-output",
1083   .node_name = "ip4-outacl",
1084   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1085 };
1086
1087 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1088 {
1089   .arc_name = "ip4-output",
1090   .node_name = "ipsec4-output-feature",
1091   .runs_before = VNET_FEATURES ("interface-output"),
1092 };
1093
1094 /* Built-in ip4 tx feature path definition */
1095 VNET_FEATURE_INIT (ip4_interface_output, static) =
1096 {
1097   .arc_name = "ip4-output",
1098   .node_name = "interface-output",
1099   .runs_before = 0,     /* not before any other features */
1100 };
1101 /* *INDENT-ON* */
1102
1103 static clib_error_t *
1104 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1105 {
1106   ip4_main_t *im = &ip4_main;
1107
1108   /* Fill in lookup tables with default table (0). */
1109   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1110   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1111
1112   if (!is_add)
1113     {
1114       ip4_main_t *im4 = &ip4_main;
1115       ip_lookup_main_t *lm4 = &im4->lookup_main;
1116       ip_interface_address_t *ia = 0;
1117       ip4_address_t *address;
1118       vlib_main_t *vm = vlib_get_main ();
1119
1120       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1121       /* *INDENT-OFF* */
1122       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1123       ({
1124         address = ip_interface_address_get_address (lm4, ia);
1125         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1126       }));
1127       /* *INDENT-ON* */
1128       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1129     }
1130
1131   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1132                                is_add, 0, 0);
1133
1134   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1135                                sw_if_index, is_add, 0, 0);
1136
1137   return /* no error */ 0;
1138 }
1139
1140 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1141
1142 /* Global IP4 main. */
1143 #ifndef CLIB_MARCH_VARIANT
1144 ip4_main_t ip4_main;
1145 #endif /* CLIB_MARCH_VARIANT */
1146
1147 static clib_error_t *
1148 ip4_lookup_init (vlib_main_t * vm)
1149 {
1150   ip4_main_t *im = &ip4_main;
1151   clib_error_t *error;
1152   uword i;
1153
1154   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1155     return error;
1156   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1157     return (error);
1158   if ((error = vlib_call_init_function (vm, fib_module_init)))
1159     return error;
1160   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1161     return error;
1162
1163   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1164     {
1165       u32 m;
1166
1167       if (i < 32)
1168         m = pow2_mask (i) << (32 - i);
1169       else
1170         m = ~0;
1171       im->fib_masks[i] = clib_host_to_net_u32 (m);
1172     }
1173
1174   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1175
1176   /* Create FIB with index 0 and table id of 0. */
1177   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1178                                      FIB_SOURCE_DEFAULT_ROUTE);
1179   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1180                                       MFIB_SOURCE_DEFAULT_ROUTE);
1181
1182   {
1183     pg_node_t *pn;
1184     pn = pg_get_node (ip4_lookup_node.index);
1185     pn->unformat_edit = unformat_pg_ip4_header;
1186   }
1187
1188   {
1189     ethernet_arp_header_t h;
1190
1191     clib_memset (&h, 0, sizeof (h));
1192
1193 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1194 #define _8(f,v) h.f = v;
1195     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1196     _16 (l3_type, ETHERNET_TYPE_IP4);
1197     _8 (n_l2_address_bytes, 6);
1198     _8 (n_l3_address_bytes, 4);
1199     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1200 #undef _16
1201 #undef _8
1202
1203     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1204                                /* data */ &h,
1205                                sizeof (h),
1206                                /* alloc chunk size */ 8,
1207                                "ip4 arp");
1208   }
1209
1210   return error;
1211 }
1212
1213 VLIB_INIT_FUNCTION (ip4_lookup_init);
1214
1215 typedef struct
1216 {
1217   /* Adjacency taken. */
1218   u32 dpo_index;
1219   u32 flow_hash;
1220   u32 fib_index;
1221
1222   /* Packet data, possibly *after* rewrite. */
1223   u8 packet_data[64 - 1 * sizeof (u32)];
1224 }
1225 ip4_forward_next_trace_t;
1226
1227 #ifndef CLIB_MARCH_VARIANT
1228 u8 *
1229 format_ip4_forward_next_trace (u8 * s, va_list * args)
1230 {
1231   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1232   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1233   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1234   u32 indent = format_get_indent (s);
1235   s = format (s, "%U%U",
1236               format_white_space, indent,
1237               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1238   return s;
1239 }
1240 #endif
1241
1242 static u8 *
1243 format_ip4_lookup_trace (u8 * s, va_list * args)
1244 {
1245   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1246   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1247   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1248   u32 indent = format_get_indent (s);
1249
1250   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1251               t->fib_index, t->dpo_index, t->flow_hash);
1252   s = format (s, "\n%U%U",
1253               format_white_space, indent,
1254               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1255   return s;
1256 }
1257
1258 static u8 *
1259 format_ip4_rewrite_trace (u8 * s, va_list * args)
1260 {
1261   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1262   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1263   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1264   u32 indent = format_get_indent (s);
1265
1266   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1267               t->fib_index, t->dpo_index, format_ip_adjacency,
1268               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1269   s = format (s, "\n%U%U",
1270               format_white_space, indent,
1271               format_ip_adjacency_packet_data,
1272               t->packet_data, sizeof (t->packet_data));
1273   return s;
1274 }
1275
1276 #ifndef CLIB_MARCH_VARIANT
1277 /* Common trace function for all ip4-forward next nodes. */
1278 void
1279 ip4_forward_next_trace (vlib_main_t * vm,
1280                         vlib_node_runtime_t * node,
1281                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1282 {
1283   u32 *from, n_left;
1284   ip4_main_t *im = &ip4_main;
1285
1286   n_left = frame->n_vectors;
1287   from = vlib_frame_vector_args (frame);
1288
1289   while (n_left >= 4)
1290     {
1291       u32 bi0, bi1;
1292       vlib_buffer_t *b0, *b1;
1293       ip4_forward_next_trace_t *t0, *t1;
1294
1295       /* Prefetch next iteration. */
1296       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1297       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1298
1299       bi0 = from[0];
1300       bi1 = from[1];
1301
1302       b0 = vlib_get_buffer (vm, bi0);
1303       b1 = vlib_get_buffer (vm, bi1);
1304
1305       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1306         {
1307           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1308           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1309           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1310           t0->fib_index =
1311             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1312              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1313             vec_elt (im->fib_index_by_sw_if_index,
1314                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1315
1316           clib_memcpy_fast (t0->packet_data,
1317                             vlib_buffer_get_current (b0),
1318                             sizeof (t0->packet_data));
1319         }
1320       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1321         {
1322           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1323           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1324           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1325           t1->fib_index =
1326             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1327              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1328             vec_elt (im->fib_index_by_sw_if_index,
1329                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1330           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1331                             sizeof (t1->packet_data));
1332         }
1333       from += 2;
1334       n_left -= 2;
1335     }
1336
1337   while (n_left >= 1)
1338     {
1339       u32 bi0;
1340       vlib_buffer_t *b0;
1341       ip4_forward_next_trace_t *t0;
1342
1343       bi0 = from[0];
1344
1345       b0 = vlib_get_buffer (vm, bi0);
1346
1347       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1348         {
1349           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1350           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1351           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1352           t0->fib_index =
1353             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1354              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1355             vec_elt (im->fib_index_by_sw_if_index,
1356                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1357           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1358                             sizeof (t0->packet_data));
1359         }
1360       from += 1;
1361       n_left -= 1;
1362     }
1363 }
1364
1365 /* Compute TCP/UDP/ICMP4 checksum in software. */
1366 u16
1367 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1368                               ip4_header_t * ip0)
1369 {
1370   ip_csum_t sum0;
1371   u32 ip_header_length, payload_length_host_byte_order;
1372
1373   /* Initialize checksum with ip header. */
1374   ip_header_length = ip4_header_bytes (ip0);
1375   payload_length_host_byte_order =
1376     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1377   sum0 =
1378     clib_host_to_net_u32 (payload_length_host_byte_order +
1379                           (ip0->protocol << 16));
1380
1381   if (BITS (uword) == 32)
1382     {
1383       sum0 =
1384         ip_csum_with_carry (sum0,
1385                             clib_mem_unaligned (&ip0->src_address, u32));
1386       sum0 =
1387         ip_csum_with_carry (sum0,
1388                             clib_mem_unaligned (&ip0->dst_address, u32));
1389     }
1390   else
1391     sum0 =
1392       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1393
1394   return ip_calculate_l4_checksum (vm, p0, sum0,
1395                                    payload_length_host_byte_order, (u8 *) ip0,
1396                                    ip_header_length, NULL);
1397 }
1398
1399 u32
1400 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1401 {
1402   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1403   udp_header_t *udp0;
1404   u16 sum16;
1405
1406   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1407           || ip0->protocol == IP_PROTOCOL_UDP);
1408
1409   udp0 = (void *) (ip0 + 1);
1410   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1411     {
1412       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1413                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1414       return p0->flags;
1415     }
1416
1417   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1418
1419   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1420                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1421
1422   return p0->flags;
1423 }
1424 #endif
1425
1426 /* *INDENT-OFF* */
1427 VNET_FEATURE_ARC_INIT (ip4_local) =
1428 {
1429   .arc_name  = "ip4-local",
1430   .start_nodes = VNET_FEATURES ("ip4-local"),
1431   .last_in_arc = "ip4-local-end-of-arc",
1432 };
1433 /* *INDENT-ON* */
1434
1435 static inline void
1436 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1437                             ip4_header_t * ip, u8 is_udp, u8 * error,
1438                             u8 * good_tcp_udp)
1439 {
1440   u32 flags0;
1441   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1442   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1443   if (is_udp)
1444     {
1445       udp_header_t *udp;
1446       u32 ip_len, udp_len;
1447       i32 len_diff;
1448       udp = ip4_next_header (ip);
1449       /* Verify UDP length. */
1450       ip_len = clib_net_to_host_u16 (ip->length);
1451       udp_len = clib_net_to_host_u16 (udp->length);
1452
1453       len_diff = ip_len - udp_len;
1454       *good_tcp_udp &= len_diff >= 0;
1455       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1456     }
1457 }
1458
1459 #define ip4_local_csum_is_offloaded(_b)                                 \
1460     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1461         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1462
1463 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1464     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1465         || ip4_local_csum_is_offloaded (_b)))
1466
1467 #define ip4_local_csum_is_valid(_b)                                     \
1468     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1469         || (ip4_local_csum_is_offloaded (_b))) != 0
1470
1471 static inline void
1472 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1473                          ip4_header_t * ih, u8 * error)
1474 {
1475   u8 is_udp, is_tcp_udp, good_tcp_udp;
1476
1477   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1478   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1479
1480   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1481     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1482   else
1483     good_tcp_udp = ip4_local_csum_is_valid (b);
1484
1485   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1486   *error = (is_tcp_udp && !good_tcp_udp
1487             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1488 }
1489
1490 static inline void
1491 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1492                             ip4_header_t ** ih, u8 * error)
1493 {
1494   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1495
1496   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1497   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1498
1499   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1500   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1501
1502   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1503   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1504
1505   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1506                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1507     {
1508       if (is_tcp_udp[0])
1509         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1510                                     &good_tcp_udp[0]);
1511       if (is_tcp_udp[1])
1512         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1513                                     &good_tcp_udp[1]);
1514     }
1515
1516   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1517               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1518   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1519               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1520 }
1521
1522 static inline void
1523 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1524                               vlib_buffer_t * b, u16 * next, u8 error,
1525                               u8 head_of_feature_arc)
1526 {
1527   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1528   u32 next_index;
1529
1530   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1531   b->error = error ? error_node->errors[error] : 0;
1532   if (head_of_feature_arc)
1533     {
1534       next_index = *next;
1535       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1536         {
1537           vnet_feature_arc_start (arc_index,
1538                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1539                                   &next_index, b);
1540           *next = next_index;
1541         }
1542     }
1543 }
1544
1545 typedef struct
1546 {
1547   ip4_address_t src;
1548   u32 lbi;
1549   u8 error;
1550   u8 first;
1551 } ip4_local_last_check_t;
1552
1553 static inline void
1554 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1555                      ip4_local_last_check_t * last_check, u8 * error0)
1556 {
1557   ip4_fib_mtrie_leaf_t leaf0;
1558   ip4_fib_mtrie_t *mtrie0;
1559   const dpo_id_t *dpo0;
1560   load_balance_t *lb0;
1561   u32 lbi0;
1562
1563   vnet_buffer (b)->ip.fib_index =
1564     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1565     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1566
1567   /*
1568    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1569    *  adjacency for the destination address (the local interface address).
1570    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1571    *  adjacency for the source address (the remote sender's address)
1572    */
1573   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1574       last_check->first)
1575     {
1576       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1577       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1578       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1579       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1580       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1581
1582       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1583         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1584       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1585
1586       lb0 = load_balance_get (lbi0);
1587       dpo0 = load_balance_get_bucket_i (lb0, 0);
1588
1589       /*
1590        * Must have a route to source otherwise we drop the packet.
1591        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1592        *
1593        * The checks are:
1594        *  - the source is a recieve => it's from us => bogus, do this
1595        *    first since it sets a different error code.
1596        *  - uRPF check for any route to source - accept if passes.
1597        *  - allow packets destined to the broadcast address from unknown sources
1598        */
1599
1600       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1601                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1602                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1603       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1604                   && !fib_urpf_check_size (lb0->lb_urpf)
1605                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1606                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1607
1608       last_check->src.as_u32 = ip0->src_address.as_u32;
1609       last_check->lbi = lbi0;
1610       last_check->error = *error0;
1611       last_check->first = 0;
1612     }
1613   else
1614     {
1615       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1616         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1617       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1618       *error0 = last_check->error;
1619     }
1620 }
1621
1622 static inline void
1623 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1624                         ip4_local_last_check_t * last_check, u8 * error)
1625 {
1626   ip4_fib_mtrie_leaf_t leaf[2];
1627   ip4_fib_mtrie_t *mtrie[2];
1628   const dpo_id_t *dpo[2];
1629   load_balance_t *lb[2];
1630   u32 not_last_hit;
1631   u32 lbi[2];
1632
1633   not_last_hit = last_check->first;
1634   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1635   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1636
1637   vnet_buffer (b[0])->ip.fib_index =
1638     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1639     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1640     vnet_buffer (b[0])->ip.fib_index;
1641
1642   vnet_buffer (b[1])->ip.fib_index =
1643     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1644     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1645     vnet_buffer (b[1])->ip.fib_index;
1646
1647   /*
1648    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1649    *  adjacency for the destination address (the local interface address).
1650    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1651    *  adjacency for the source address (the remote sender's address)
1652    */
1653   if (PREDICT_TRUE (not_last_hit))
1654     {
1655       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1656       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1657
1658       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1659       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1660
1661       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1662                                            &ip[0]->src_address, 2);
1663       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1664                                            &ip[1]->src_address, 2);
1665
1666       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1667                                            &ip[0]->src_address, 3);
1668       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1669                                            &ip[1]->src_address, 3);
1670
1671       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1672       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1673
1674       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1675         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1676       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1677
1678       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1679         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1680       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1681
1682       lb[0] = load_balance_get (lbi[0]);
1683       lb[1] = load_balance_get (lbi[1]);
1684
1685       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1686       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1687
1688       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1689                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1690                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1691       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1693                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1694                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1695
1696       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1697                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1698                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1699       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1700                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1701                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1702                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1703
1704       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1705       last_check->lbi = lbi[1];
1706       last_check->error = error[1];
1707       last_check->first = 0;
1708     }
1709   else
1710     {
1711       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1712         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1713       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1714
1715       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1716         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1717       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1718
1719       error[0] = last_check->error;
1720       error[1] = last_check->error;
1721     }
1722 }
1723
1724 enum ip_local_packet_type_e
1725 {
1726   IP_LOCAL_PACKET_TYPE_L4,
1727   IP_LOCAL_PACKET_TYPE_NAT,
1728   IP_LOCAL_PACKET_TYPE_FRAG,
1729 };
1730
1731 /**
1732  * Determine packet type and next node.
1733  *
1734  * The expectation is that all packets that are not L4 will skip
1735  * checksums and source checks.
1736  */
1737 always_inline u8
1738 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1739 {
1740   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1741
1742   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1743     {
1744       *next = IP_LOCAL_NEXT_REASSEMBLY;
1745       return IP_LOCAL_PACKET_TYPE_FRAG;
1746     }
1747   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1748     {
1749       *next = lm->local_next_by_ip_protocol[ip->protocol];
1750       return IP_LOCAL_PACKET_TYPE_NAT;
1751     }
1752
1753   *next = lm->local_next_by_ip_protocol[ip->protocol];
1754   return IP_LOCAL_PACKET_TYPE_L4;
1755 }
1756
1757 static inline uword
1758 ip4_local_inline (vlib_main_t * vm,
1759                   vlib_node_runtime_t * node,
1760                   vlib_frame_t * frame, int head_of_feature_arc)
1761 {
1762   u32 *from, n_left_from;
1763   vlib_node_runtime_t *error_node =
1764     vlib_node_get_runtime (vm, ip4_local_node.index);
1765   u16 nexts[VLIB_FRAME_SIZE], *next;
1766   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1767   ip4_header_t *ip[2];
1768   u8 error[2], pt[2];
1769
1770   ip4_local_last_check_t last_check = {
1771     /*
1772      * 0.0.0.0 can appear as the source address of an IP packet,
1773      * as can any other address, hence the need to use the 'first'
1774      * member to make sure the .lbi is initialised for the first
1775      * packet.
1776      */
1777     .src = {.as_u32 = 0},
1778     .lbi = ~0,
1779     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1780     .first = 1,
1781   };
1782
1783   from = vlib_frame_vector_args (frame);
1784   n_left_from = frame->n_vectors;
1785
1786   if (node->flags & VLIB_NODE_FLAG_TRACE)
1787     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1788
1789   vlib_get_buffers (vm, from, bufs, n_left_from);
1790   b = bufs;
1791   next = nexts;
1792
1793   while (n_left_from >= 6)
1794     {
1795       u8 not_batch = 0;
1796
1797       /* Prefetch next iteration. */
1798       {
1799         vlib_prefetch_buffer_header (b[4], LOAD);
1800         vlib_prefetch_buffer_header (b[5], LOAD);
1801
1802         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1803         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1804       }
1805
1806       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1807
1808       ip[0] = vlib_buffer_get_current (b[0]);
1809       ip[1] = vlib_buffer_get_current (b[1]);
1810
1811       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1812       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1813
1814       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1815       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1816
1817       not_batch = pt[0] ^ pt[1];
1818
1819       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1820         goto skip_checks;
1821
1822       if (PREDICT_TRUE (not_batch == 0))
1823         {
1824           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1825           ip4_local_check_src_x2 (b, ip, &last_check, error);
1826         }
1827       else
1828         {
1829           if (!pt[0])
1830             {
1831               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1832               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1833             }
1834           if (!pt[1])
1835             {
1836               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1837               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1838             }
1839         }
1840
1841     skip_checks:
1842
1843       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1844                                     head_of_feature_arc);
1845       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1846                                     head_of_feature_arc);
1847
1848       b += 2;
1849       next += 2;
1850       n_left_from -= 2;
1851     }
1852
1853   while (n_left_from > 0)
1854     {
1855       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1856
1857       ip[0] = vlib_buffer_get_current (b[0]);
1858       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1859       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1860
1861       if (head_of_feature_arc == 0 || pt[0])
1862         goto skip_check;
1863
1864       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1865       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1866
1867     skip_check:
1868
1869       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1870                                     head_of_feature_arc);
1871
1872       b += 1;
1873       next += 1;
1874       n_left_from -= 1;
1875     }
1876
1877   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1878   return frame->n_vectors;
1879 }
1880
1881 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1882                                vlib_frame_t * frame)
1883 {
1884   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1885 }
1886
1887 /* *INDENT-OFF* */
1888 VLIB_REGISTER_NODE (ip4_local_node) =
1889 {
1890   .name = "ip4-local",
1891   .vector_size = sizeof (u32),
1892   .format_trace = format_ip4_forward_next_trace,
1893   .n_errors = IP4_N_ERROR,
1894   .error_strings = ip4_error_strings,
1895   .n_next_nodes = IP_LOCAL_N_NEXT,
1896   .next_nodes =
1897   {
1898     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1899     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1900     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1901     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1902     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1903   },
1904 };
1905 /* *INDENT-ON* */
1906
1907
1908 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1909                                           vlib_node_runtime_t * node,
1910                                           vlib_frame_t * frame)
1911 {
1912   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1913 }
1914
1915 /* *INDENT-OFF* */
1916 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1917   .name = "ip4-local-end-of-arc",
1918   .vector_size = sizeof (u32),
1919
1920   .format_trace = format_ip4_forward_next_trace,
1921   .sibling_of = "ip4-local",
1922 };
1923
1924 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1925   .arc_name = "ip4-local",
1926   .node_name = "ip4-local-end-of-arc",
1927   .runs_before = 0, /* not before any other features */
1928 };
1929 /* *INDENT-ON* */
1930
1931 #ifndef CLIB_MARCH_VARIANT
1932 void
1933 ip4_register_protocol (u32 protocol, u32 node_index)
1934 {
1935   vlib_main_t *vm = vlib_get_main ();
1936   ip4_main_t *im = &ip4_main;
1937   ip_lookup_main_t *lm = &im->lookup_main;
1938
1939   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1940   lm->local_next_by_ip_protocol[protocol] =
1941     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1942 }
1943
1944 void
1945 ip4_unregister_protocol (u32 protocol)
1946 {
1947   ip4_main_t *im = &ip4_main;
1948   ip_lookup_main_t *lm = &im->lookup_main;
1949
1950   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1951   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1952 }
1953 #endif
1954
1955 static clib_error_t *
1956 show_ip_local_command_fn (vlib_main_t * vm,
1957                           unformat_input_t * input, vlib_cli_command_t * cmd)
1958 {
1959   ip4_main_t *im = &ip4_main;
1960   ip_lookup_main_t *lm = &im->lookup_main;
1961   int i;
1962
1963   vlib_cli_output (vm, "Protocols handled by ip4_local");
1964   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1965     {
1966       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1967         {
1968           u32 node_index = vlib_get_node (vm,
1969                                           ip4_local_node.index)->
1970             next_nodes[lm->local_next_by_ip_protocol[i]];
1971           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1972                            format_vlib_node_name, vm, node_index);
1973         }
1974     }
1975   return 0;
1976 }
1977
1978
1979
1980 /*?
1981  * Display the set of protocols handled by the local IPv4 stack.
1982  *
1983  * @cliexpar
1984  * Example of how to display local protocol table:
1985  * @cliexstart{show ip local}
1986  * Protocols handled by ip4_local
1987  * 1
1988  * 17
1989  * 47
1990  * @cliexend
1991 ?*/
1992 /* *INDENT-OFF* */
1993 VLIB_CLI_COMMAND (show_ip_local, static) =
1994 {
1995   .path = "show ip local",
1996   .function = show_ip_local_command_fn,
1997   .short_help = "show ip local",
1998 };
1999 /* *INDENT-ON* */
2000
2001 typedef enum
2002 {
2003   IP4_REWRITE_NEXT_DROP,
2004   IP4_REWRITE_NEXT_ICMP_ERROR,
2005   IP4_REWRITE_NEXT_FRAGMENT,
2006   IP4_REWRITE_N_NEXT            /* Last */
2007 } ip4_rewrite_next_t;
2008
2009 /**
2010  * This bits of an IPv4 address to mask to construct a multicast
2011  * MAC address
2012  */
2013 #if CLIB_ARCH_IS_BIG_ENDIAN
2014 #define IP4_MCAST_ADDR_MASK 0x007fffff
2015 #else
2016 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2017 #endif
2018
2019 always_inline void
2020 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2021                u16 adj_packet_bytes, bool df, u16 * next,
2022                u8 is_midchain, u32 * error)
2023 {
2024   if (packet_len > adj_packet_bytes)
2025     {
2026       *error = IP4_ERROR_MTU_EXCEEDED;
2027       if (df)
2028         {
2029           icmp4_error_set_vnet_buffer
2030             (b, ICMP4_destination_unreachable,
2031              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2032              adj_packet_bytes);
2033           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2034         }
2035       else
2036         {
2037           /* IP fragmentation */
2038           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2039                                    (is_midchain ?
2040                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2041                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2042           *next = IP4_REWRITE_NEXT_FRAGMENT;
2043         }
2044     }
2045 }
2046
2047 /* increment TTL & update checksum.
2048    Works either endian, so no need for byte swap. */
2049 static_always_inline void
2050 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2051 {
2052   i32 ttl;
2053   u32 checksum;
2054   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2055     return;
2056
2057   ttl = ip->ttl;
2058
2059   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2060   checksum += checksum >= 0xffff;
2061
2062   ip->checksum = checksum;
2063   ttl += 1;
2064   ip->ttl = ttl;
2065
2066   ASSERT (ip4_header_checksum_is_valid (ip));
2067 }
2068
2069 /* Decrement TTL & update checksum.
2070    Works either endian, so no need for byte swap. */
2071 static_always_inline void
2072 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2073                             u32 * error)
2074 {
2075   i32 ttl;
2076   u32 checksum;
2077   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2078     return;
2079
2080   ttl = ip->ttl;
2081
2082   /* Input node should have reject packets with ttl 0. */
2083   ASSERT (ip->ttl > 0);
2084
2085   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2086   checksum += checksum >= 0xffff;
2087
2088   ip->checksum = checksum;
2089   ttl -= 1;
2090   ip->ttl = ttl;
2091
2092   /*
2093    * If the ttl drops below 1 when forwarding, generate
2094    * an ICMP response.
2095    */
2096   if (PREDICT_FALSE (ttl <= 0))
2097     {
2098       *error = IP4_ERROR_TIME_EXPIRED;
2099       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2100       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2101                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2102                                    0);
2103       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2104     }
2105
2106   /* Verify checksum. */
2107   ASSERT (ip4_header_checksum_is_valid (ip) ||
2108           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2109 }
2110
2111
2112 always_inline uword
2113 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2114                              vlib_node_runtime_t * node,
2115                              vlib_frame_t * frame,
2116                              int do_counters, int is_midchain, int is_mcast)
2117 {
2118   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2119   u32 *from = vlib_frame_vector_args (frame);
2120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2121   u16 nexts[VLIB_FRAME_SIZE], *next;
2122   u32 n_left_from;
2123   vlib_node_runtime_t *error_node =
2124     vlib_node_get_runtime (vm, ip4_input_node.index);
2125
2126   n_left_from = frame->n_vectors;
2127   u32 thread_index = vm->thread_index;
2128
2129   vlib_get_buffers (vm, from, bufs, n_left_from);
2130   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2131
2132 #if (CLIB_N_PREFETCHES >= 8)
2133   if (n_left_from >= 6)
2134     {
2135       int i;
2136       for (i = 2; i < 6; i++)
2137         vlib_prefetch_buffer_header (bufs[i], LOAD);
2138     }
2139
2140   next = nexts;
2141   b = bufs;
2142   while (n_left_from >= 8)
2143     {
2144       const ip_adjacency_t *adj0, *adj1;
2145       ip4_header_t *ip0, *ip1;
2146       u32 rw_len0, error0, adj_index0;
2147       u32 rw_len1, error1, adj_index1;
2148       u32 tx_sw_if_index0, tx_sw_if_index1;
2149       u8 *p;
2150
2151       vlib_prefetch_buffer_header (b[6], LOAD);
2152       vlib_prefetch_buffer_header (b[7], LOAD);
2153
2154       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2155       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2156
2157       /*
2158        * pre-fetch the per-adjacency counters
2159        */
2160       if (do_counters)
2161         {
2162           vlib_prefetch_combined_counter (&adjacency_counters,
2163                                           thread_index, adj_index0);
2164           vlib_prefetch_combined_counter (&adjacency_counters,
2165                                           thread_index, adj_index1);
2166         }
2167
2168       ip0 = vlib_buffer_get_current (b[0]);
2169       ip1 = vlib_buffer_get_current (b[1]);
2170
2171       error0 = error1 = IP4_ERROR_NONE;
2172
2173       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2174       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2175
2176       /* Rewrite packet header and updates lengths. */
2177       adj0 = adj_get (adj_index0);
2178       adj1 = adj_get (adj_index1);
2179
2180       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2181       rw_len0 = adj0[0].rewrite_header.data_bytes;
2182       rw_len1 = adj1[0].rewrite_header.data_bytes;
2183       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2184       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2185
2186       p = vlib_buffer_get_current (b[2]);
2187       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2188       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2189
2190       p = vlib_buffer_get_current (b[3]);
2191       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2192       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2193
2194       /* Check MTU of outgoing interface. */
2195       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2196       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2197
2198       if (b[0]->flags & VNET_BUFFER_F_GSO)
2199         ip0_len = gso_mtu_sz (b[0]);
2200       if (b[1]->flags & VNET_BUFFER_F_GSO)
2201         ip1_len = gso_mtu_sz (b[1]);
2202
2203       ip4_mtu_check (b[0], ip0_len,
2204                      adj0[0].rewrite_header.max_l3_packet_bytes,
2205                      ip0->flags_and_fragment_offset &
2206                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2207                      next + 0, is_midchain, &error0);
2208       ip4_mtu_check (b[1], ip1_len,
2209                      adj1[0].rewrite_header.max_l3_packet_bytes,
2210                      ip1->flags_and_fragment_offset &
2211                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2212                      next + 1, is_midchain, &error1);
2213
2214       if (is_mcast)
2215         {
2216           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2217                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2218                     IP4_ERROR_SAME_INTERFACE : error0);
2219           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2220                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2221                     IP4_ERROR_SAME_INTERFACE : error1);
2222         }
2223
2224       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2225        * to see the IP header */
2226       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2227         {
2228           u32 next_index = adj0[0].rewrite_header.next_index;
2229           vlib_buffer_advance (b[0], -(word) rw_len0);
2230
2231           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2232           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2233
2234           if (PREDICT_FALSE
2235               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2236             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2237                                                 tx_sw_if_index0,
2238                                                 &next_index, b[0],
2239                                                 adj0->ia_cfg_index);
2240
2241           next[0] = next_index;
2242           if (is_midchain)
2243             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2244                                         0 /* is_ip6 */ );
2245         }
2246       else
2247         {
2248           b[0]->error = error_node->errors[error0];
2249           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2250             ip4_ttl_inc (b[0], ip0);
2251         }
2252       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2253         {
2254           u32 next_index = adj1[0].rewrite_header.next_index;
2255           vlib_buffer_advance (b[1], -(word) rw_len1);
2256
2257           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2258           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2259
2260           if (PREDICT_FALSE
2261               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2262             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2263                                                 tx_sw_if_index1,
2264                                                 &next_index, b[1],
2265                                                 adj1->ia_cfg_index);
2266           next[1] = next_index;
2267           if (is_midchain)
2268             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2269                                         0 /* is_ip6 */ );
2270         }
2271       else
2272         {
2273           b[1]->error = error_node->errors[error1];
2274           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2275             ip4_ttl_inc (b[1], ip1);
2276         }
2277
2278       if (is_midchain)
2279         /* Guess we are only writing on ipv4 header. */
2280         vnet_rewrite_two_headers (adj0[0], adj1[0],
2281                                   ip0, ip1, sizeof (ip4_header_t));
2282       else
2283         /* Guess we are only writing on simple Ethernet header. */
2284         vnet_rewrite_two_headers (adj0[0], adj1[0],
2285                                   ip0, ip1, sizeof (ethernet_header_t));
2286
2287       if (do_counters)
2288         {
2289           if (error0 == IP4_ERROR_NONE)
2290             vlib_increment_combined_counter
2291               (&adjacency_counters,
2292                thread_index,
2293                adj_index0, 1,
2294                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2295
2296           if (error1 == IP4_ERROR_NONE)
2297             vlib_increment_combined_counter
2298               (&adjacency_counters,
2299                thread_index,
2300                adj_index1, 1,
2301                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2302         }
2303
2304       if (is_midchain)
2305         {
2306           if (error0 == IP4_ERROR_NONE)
2307             adj_midchain_fixup (vm, adj0, b[0]);
2308           if (error1 == IP4_ERROR_NONE)
2309             adj_midchain_fixup (vm, adj1, b[1]);
2310         }
2311
2312       if (is_mcast)
2313         {
2314           /* copy bytes from the IP address into the MAC rewrite */
2315           if (error0 == IP4_ERROR_NONE)
2316             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2317                                         adj0->rewrite_header.dst_mcast_offset,
2318                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2319           if (error1 == IP4_ERROR_NONE)
2320             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2321                                         adj1->rewrite_header.dst_mcast_offset,
2322                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2323         }
2324
2325       next += 2;
2326       b += 2;
2327       n_left_from -= 2;
2328     }
2329 #elif (CLIB_N_PREFETCHES >= 4)
2330   next = nexts;
2331   b = bufs;
2332   while (n_left_from >= 1)
2333     {
2334       ip_adjacency_t *adj0;
2335       ip4_header_t *ip0;
2336       u32 rw_len0, error0, adj_index0;
2337       u32 tx_sw_if_index0;
2338       u8 *p;
2339
2340       /* Prefetch next iteration */
2341       if (PREDICT_TRUE (n_left_from >= 4))
2342         {
2343           ip_adjacency_t *adj2;
2344           u32 adj_index2;
2345
2346           vlib_prefetch_buffer_header (b[3], LOAD);
2347           vlib_prefetch_buffer_data (b[2], LOAD);
2348
2349           /* Prefetch adj->rewrite_header */
2350           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2351           adj2 = adj_get (adj_index2);
2352           p = (u8 *) adj2;
2353           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2354                          LOAD);
2355         }
2356
2357       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2358
2359       /*
2360        * Prefetch the per-adjacency counters
2361        */
2362       if (do_counters)
2363         {
2364           vlib_prefetch_combined_counter (&adjacency_counters,
2365                                           thread_index, adj_index0);
2366         }
2367
2368       ip0 = vlib_buffer_get_current (b[0]);
2369
2370       error0 = IP4_ERROR_NONE;
2371
2372       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2373
2374       /* Rewrite packet header and updates lengths. */
2375       adj0 = adj_get (adj_index0);
2376
2377       /* Rewrite header was prefetched. */
2378       rw_len0 = adj0[0].rewrite_header.data_bytes;
2379       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2380
2381       /* Check MTU of outgoing interface. */
2382       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2383
2384       if (b[0]->flags & VNET_BUFFER_F_GSO)
2385         ip0_len = gso_mtu_sz (b[0]);
2386
2387       ip4_mtu_check (b[0], ip0_len,
2388                      adj0[0].rewrite_header.max_l3_packet_bytes,
2389                      ip0->flags_and_fragment_offset &
2390                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2391                      next + 0, is_midchain, &error0);
2392
2393       if (is_mcast)
2394         {
2395           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2396                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2397                     IP4_ERROR_SAME_INTERFACE : error0);
2398         }
2399
2400       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2401        * to see the IP header */
2402       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2403         {
2404           u32 next_index = adj0[0].rewrite_header.next_index;
2405           vlib_buffer_advance (b[0], -(word) rw_len0);
2406           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2407           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2408
2409           if (PREDICT_FALSE
2410               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2411             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2412                                                 tx_sw_if_index0,
2413                                                 &next_index, b[0],
2414                                                 adj0->ia_cfg_index);
2415           next[0] = next_index;
2416
2417           if (is_midchain)
2418             {
2419               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2420                                           0 /* is_ip6 */ );
2421
2422               /* Guess we are only writing on ipv4 header. */
2423               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2424             }
2425           else
2426             /* Guess we are only writing on simple Ethernet header. */
2427             vnet_rewrite_one_header (adj0[0], ip0,
2428                                      sizeof (ethernet_header_t));
2429
2430           /*
2431            * Bump the per-adjacency counters
2432            */
2433           if (do_counters)
2434             vlib_increment_combined_counter
2435               (&adjacency_counters,
2436                thread_index,
2437                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2438                                                            b[0]) + rw_len0);
2439
2440           if (is_midchain)
2441             adj_midchain_fixup (vm, adj0, b[0]);
2442
2443           if (is_mcast)
2444             /* copy bytes from the IP address into the MAC rewrite */
2445             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2446                                         adj0->rewrite_header.dst_mcast_offset,
2447                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2448         }
2449       else
2450         {
2451           b[0]->error = error_node->errors[error0];
2452           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2453             ip4_ttl_inc (b[0], ip0);
2454         }
2455
2456       next += 1;
2457       b += 1;
2458       n_left_from -= 1;
2459     }
2460 #endif
2461
2462   while (n_left_from > 0)
2463     {
2464       ip_adjacency_t *adj0;
2465       ip4_header_t *ip0;
2466       u32 rw_len0, adj_index0, error0;
2467       u32 tx_sw_if_index0;
2468
2469       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2470
2471       adj0 = adj_get (adj_index0);
2472
2473       if (do_counters)
2474         vlib_prefetch_combined_counter (&adjacency_counters,
2475                                         thread_index, adj_index0);
2476
2477       ip0 = vlib_buffer_get_current (b[0]);
2478
2479       error0 = IP4_ERROR_NONE;
2480
2481       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2482
2483
2484       /* Update packet buffer attributes/set output interface. */
2485       rw_len0 = adj0[0].rewrite_header.data_bytes;
2486       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2487
2488       /* Check MTU of outgoing interface. */
2489       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2490       if (b[0]->flags & VNET_BUFFER_F_GSO)
2491         ip0_len = gso_mtu_sz (b[0]);
2492
2493       ip4_mtu_check (b[0], ip0_len,
2494                      adj0[0].rewrite_header.max_l3_packet_bytes,
2495                      ip0->flags_and_fragment_offset &
2496                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2497                      next + 0, is_midchain, &error0);
2498
2499       if (is_mcast)
2500         {
2501           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2502                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2503                     IP4_ERROR_SAME_INTERFACE : error0);
2504         }
2505
2506       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2507        * to see the IP header */
2508       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2509         {
2510           u32 next_index = adj0[0].rewrite_header.next_index;
2511           vlib_buffer_advance (b[0], -(word) rw_len0);
2512           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2513           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2514
2515           if (PREDICT_FALSE
2516               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2517             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2518                                                 tx_sw_if_index0,
2519                                                 &next_index, b[0],
2520                                                 adj0->ia_cfg_index);
2521           next[0] = next_index;
2522
2523           if (is_midchain)
2524             {
2525               /* this acts on the packet that is about to be encapped */
2526               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2527                                           0 /* is_ip6 */ );
2528
2529               /* Guess we are only writing on ipv4 header. */
2530               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2531             }
2532           else
2533             /* Guess we are only writing on simple Ethernet header. */
2534             vnet_rewrite_one_header (adj0[0], ip0,
2535                                      sizeof (ethernet_header_t));
2536
2537           if (do_counters)
2538             vlib_increment_combined_counter
2539               (&adjacency_counters,
2540                thread_index, adj_index0, 1,
2541                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2542
2543           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2544             adj0->sub_type.midchain.fixup_func
2545               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2546
2547           if (is_mcast)
2548             /* copy bytes from the IP address into the MAC rewrite */
2549             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2550                                         adj0->rewrite_header.dst_mcast_offset,
2551                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2552         }
2553       else
2554         {
2555           b[0]->error = error_node->errors[error0];
2556           /* undo the TTL decrement - we'll be back to do it again */
2557           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2558             ip4_ttl_inc (b[0], ip0);
2559         }
2560
2561       next += 1;
2562       b += 1;
2563       n_left_from -= 1;
2564     }
2565
2566
2567   /* Need to do trace after rewrites to pick up new packet data. */
2568   if (node->flags & VLIB_NODE_FLAG_TRACE)
2569     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2570
2571   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2572   return frame->n_vectors;
2573 }
2574
2575 always_inline uword
2576 ip4_rewrite_inline (vlib_main_t * vm,
2577                     vlib_node_runtime_t * node,
2578                     vlib_frame_t * frame,
2579                     int do_counters, int is_midchain, int is_mcast)
2580 {
2581   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2582                                       is_midchain, is_mcast);
2583 }
2584
2585
2586 /** @brief IPv4 rewrite node.
2587     @node ip4-rewrite
2588
2589     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2590     header checksum, fetch the ip adjacency, check the outbound mtu,
2591     apply the adjacency rewrite, and send pkts to the adjacency
2592     rewrite header's rewrite_next_index.
2593
2594     @param vm vlib_main_t corresponding to the current thread
2595     @param node vlib_node_runtime_t
2596     @param frame vlib_frame_t whose contents should be dispatched
2597
2598     @par Graph mechanics: buffer metadata, next index usage
2599
2600     @em Uses:
2601     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2602         - the rewrite adjacency index
2603     - <code>adj->lookup_next_index</code>
2604         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2605           the packet will be dropped.
2606     - <code>adj->rewrite_header</code>
2607         - Rewrite string length, rewrite string, next_index
2608
2609     @em Sets:
2610     - <code>b->current_data, b->current_length</code>
2611         - Updated net of applying the rewrite string
2612
2613     <em>Next Indices:</em>
2614     - <code> adj->rewrite_header.next_index </code>
2615       or @c ip4-drop
2616 */
2617
2618 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2619                                  vlib_frame_t * frame)
2620 {
2621   if (adj_are_counters_enabled ())
2622     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2623   else
2624     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2625 }
2626
2627 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2628                                        vlib_node_runtime_t * node,
2629                                        vlib_frame_t * frame)
2630 {
2631   if (adj_are_counters_enabled ())
2632     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2633   else
2634     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2635 }
2636
2637 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2638                                   vlib_node_runtime_t * node,
2639                                   vlib_frame_t * frame)
2640 {
2641   if (adj_are_counters_enabled ())
2642     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2643   else
2644     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2645 }
2646
2647 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2648                                        vlib_node_runtime_t * node,
2649                                        vlib_frame_t * frame)
2650 {
2651   if (adj_are_counters_enabled ())
2652     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2653   else
2654     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2655 }
2656
2657 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2658                                         vlib_node_runtime_t * node,
2659                                         vlib_frame_t * frame)
2660 {
2661   if (adj_are_counters_enabled ())
2662     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2663   else
2664     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2665 }
2666
2667 /* *INDENT-OFF* */
2668 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2669   .name = "ip4-rewrite",
2670   .vector_size = sizeof (u32),
2671
2672   .format_trace = format_ip4_rewrite_trace,
2673
2674   .n_next_nodes = IP4_REWRITE_N_NEXT,
2675   .next_nodes = {
2676     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2677     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2678     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2679   },
2680 };
2681
2682 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2683   .name = "ip4-rewrite-bcast",
2684   .vector_size = sizeof (u32),
2685
2686   .format_trace = format_ip4_rewrite_trace,
2687   .sibling_of = "ip4-rewrite",
2688 };
2689
2690 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2691   .name = "ip4-rewrite-mcast",
2692   .vector_size = sizeof (u32),
2693
2694   .format_trace = format_ip4_rewrite_trace,
2695   .sibling_of = "ip4-rewrite",
2696 };
2697
2698 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2699   .name = "ip4-mcast-midchain",
2700   .vector_size = sizeof (u32),
2701
2702   .format_trace = format_ip4_rewrite_trace,
2703   .sibling_of = "ip4-rewrite",
2704 };
2705
2706 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2707   .name = "ip4-midchain",
2708   .vector_size = sizeof (u32),
2709   .format_trace = format_ip4_rewrite_trace,
2710   .sibling_of = "ip4-rewrite",
2711 };
2712 /* *INDENT-ON */
2713
2714 static int
2715 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2716 {
2717   ip4_fib_mtrie_t *mtrie0;
2718   ip4_fib_mtrie_leaf_t leaf0;
2719   u32 lbi0;
2720
2721   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2722
2723   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2724   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2725   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2726
2727   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2728
2729   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2730 }
2731
2732 static clib_error_t *
2733 test_lookup_command_fn (vlib_main_t * vm,
2734                         unformat_input_t * input, vlib_cli_command_t * cmd)
2735 {
2736   ip4_fib_t *fib;
2737   u32 table_id = 0;
2738   f64 count = 1;
2739   u32 n;
2740   int i;
2741   ip4_address_t ip4_base_address;
2742   u64 errors = 0;
2743
2744   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2745     {
2746       if (unformat (input, "table %d", &table_id))
2747         {
2748           /* Make sure the entry exists. */
2749           fib = ip4_fib_get (table_id);
2750           if ((fib) && (fib->index != table_id))
2751             return clib_error_return (0, "<fib-index> %d does not exist",
2752                                       table_id);
2753         }
2754       else if (unformat (input, "count %f", &count))
2755         ;
2756
2757       else if (unformat (input, "%U",
2758                          unformat_ip4_address, &ip4_base_address))
2759         ;
2760       else
2761         return clib_error_return (0, "unknown input `%U'",
2762                                   format_unformat_error, input);
2763     }
2764
2765   n = count;
2766
2767   for (i = 0; i < n; i++)
2768     {
2769       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2770         errors++;
2771
2772       ip4_base_address.as_u32 =
2773         clib_host_to_net_u32 (1 +
2774                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2775     }
2776
2777   if (errors)
2778     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2779   else
2780     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2781
2782   return 0;
2783 }
2784
2785 /*?
2786  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2787  * given FIB table to determine if there is a conflict with the
2788  * adjacency table. The fib-id can be determined by using the
2789  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2790  * of 0 is used.
2791  *
2792  * @todo This command uses fib-id, other commands use table-id (not
2793  * just a name, they are different indexes). Would like to change this
2794  * to table-id for consistency.
2795  *
2796  * @cliexpar
2797  * Example of how to run the test lookup command:
2798  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2799  * No errors in 2 lookups
2800  * @cliexend
2801 ?*/
2802 /* *INDENT-OFF* */
2803 VLIB_CLI_COMMAND (lookup_test_command, static) =
2804 {
2805   .path = "test lookup",
2806   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2807   .function = test_lookup_command_fn,
2808 };
2809 /* *INDENT-ON* */
2810
2811 #ifndef CLIB_MARCH_VARIANT
2812 int
2813 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2814 {
2815   u32 fib_index;
2816
2817   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2818
2819   if (~0 == fib_index)
2820     return VNET_API_ERROR_NO_SUCH_FIB;
2821
2822   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2823                                   flow_hash_config);
2824
2825   return 0;
2826 }
2827 #endif
2828
2829 static clib_error_t *
2830 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2831                              unformat_input_t * input,
2832                              vlib_cli_command_t * cmd)
2833 {
2834   int matched = 0;
2835   u32 table_id = 0;
2836   u32 flow_hash_config = 0;
2837   int rv;
2838
2839   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2840     {
2841       if (unformat (input, "table %d", &table_id))
2842         matched = 1;
2843 #define _(a,v) \
2844     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2845       foreach_flow_hash_bit
2846 #undef _
2847         else
2848         break;
2849     }
2850
2851   if (matched == 0)
2852     return clib_error_return (0, "unknown input `%U'",
2853                               format_unformat_error, input);
2854
2855   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2856   switch (rv)
2857     {
2858     case 0:
2859       break;
2860
2861     case VNET_API_ERROR_NO_SUCH_FIB:
2862       return clib_error_return (0, "no such FIB table %d", table_id);
2863
2864     default:
2865       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2866       break;
2867     }
2868
2869   return 0;
2870 }
2871
2872 /*?
2873  * Configure the set of IPv4 fields used by the flow hash.
2874  *
2875  * @cliexpar
2876  * Example of how to set the flow hash on a given table:
2877  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2878  * Example of display the configured flow hash:
2879  * @cliexstart{show ip fib}
2880  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2881  * 0.0.0.0/0
2882  *   unicast-ip4-chain
2883  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2884  *     [0] [@0]: dpo-drop ip6
2885  * 0.0.0.0/32
2886  *   unicast-ip4-chain
2887  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2888  *     [0] [@0]: dpo-drop ip6
2889  * 224.0.0.0/8
2890  *   unicast-ip4-chain
2891  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2892  *     [0] [@0]: dpo-drop ip6
2893  * 6.0.1.2/32
2894  *   unicast-ip4-chain
2895  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2896  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2897  * 7.0.0.1/32
2898  *   unicast-ip4-chain
2899  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2900  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2901  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2902  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2903  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2904  * 240.0.0.0/8
2905  *   unicast-ip4-chain
2906  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2907  *     [0] [@0]: dpo-drop ip6
2908  * 255.255.255.255/32
2909  *   unicast-ip4-chain
2910  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2911  *     [0] [@0]: dpo-drop ip6
2912  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2913  * 0.0.0.0/0
2914  *   unicast-ip4-chain
2915  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2916  *     [0] [@0]: dpo-drop ip6
2917  * 0.0.0.0/32
2918  *   unicast-ip4-chain
2919  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2920  *     [0] [@0]: dpo-drop ip6
2921  * 172.16.1.0/24
2922  *   unicast-ip4-chain
2923  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2924  *     [0] [@4]: ipv4-glean: af_packet0
2925  * 172.16.1.1/32
2926  *   unicast-ip4-chain
2927  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2928  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2929  * 172.16.1.2/32
2930  *   unicast-ip4-chain
2931  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2932  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2933  * 172.16.2.0/24
2934  *   unicast-ip4-chain
2935  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2936  *     [0] [@4]: ipv4-glean: af_packet1
2937  * 172.16.2.1/32
2938  *   unicast-ip4-chain
2939  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2940  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2941  * 224.0.0.0/8
2942  *   unicast-ip4-chain
2943  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2944  *     [0] [@0]: dpo-drop ip6
2945  * 240.0.0.0/8
2946  *   unicast-ip4-chain
2947  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2948  *     [0] [@0]: dpo-drop ip6
2949  * 255.255.255.255/32
2950  *   unicast-ip4-chain
2951  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2952  *     [0] [@0]: dpo-drop ip6
2953  * @cliexend
2954 ?*/
2955 /* *INDENT-OFF* */
2956 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2957 {
2958   .path = "set ip flow-hash",
2959   .short_help =
2960   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2961   .function = set_ip_flow_hash_command_fn,
2962 };
2963 /* *INDENT-ON* */
2964
2965 #ifndef CLIB_MARCH_VARIANT
2966 int
2967 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2968                              u32 table_index)
2969 {
2970   vnet_main_t *vnm = vnet_get_main ();
2971   vnet_interface_main_t *im = &vnm->interface_main;
2972   ip4_main_t *ipm = &ip4_main;
2973   ip_lookup_main_t *lm = &ipm->lookup_main;
2974   vnet_classify_main_t *cm = &vnet_classify_main;
2975   ip4_address_t *if_addr;
2976
2977   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2978     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2979
2980   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2981     return VNET_API_ERROR_NO_SUCH_ENTRY;
2982
2983   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2984   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2985
2986   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2987
2988   if (NULL != if_addr)
2989     {
2990       fib_prefix_t pfx = {
2991         .fp_len = 32,
2992         .fp_proto = FIB_PROTOCOL_IP4,
2993         .fp_addr.ip4 = *if_addr,
2994       };
2995       u32 fib_index;
2996
2997       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2998                                                        sw_if_index);
2999
3000
3001       if (table_index != (u32) ~ 0)
3002         {
3003           dpo_id_t dpo = DPO_INVALID;
3004
3005           dpo_set (&dpo,
3006                    DPO_CLASSIFY,
3007                    DPO_PROTO_IP4,
3008                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3009
3010           fib_table_entry_special_dpo_add (fib_index,
3011                                            &pfx,
3012                                            FIB_SOURCE_CLASSIFY,
3013                                            FIB_ENTRY_FLAG_NONE, &dpo);
3014           dpo_reset (&dpo);
3015         }
3016       else
3017         {
3018           fib_table_entry_special_remove (fib_index,
3019                                           &pfx, FIB_SOURCE_CLASSIFY);
3020         }
3021     }
3022
3023   return 0;
3024 }
3025 #endif
3026
3027 static clib_error_t *
3028 set_ip_classify_command_fn (vlib_main_t * vm,
3029                             unformat_input_t * input,
3030                             vlib_cli_command_t * cmd)
3031 {
3032   u32 table_index = ~0;
3033   int table_index_set = 0;
3034   u32 sw_if_index = ~0;
3035   int rv;
3036
3037   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3038     {
3039       if (unformat (input, "table-index %d", &table_index))
3040         table_index_set = 1;
3041       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3042                          vnet_get_main (), &sw_if_index))
3043         ;
3044       else
3045         break;
3046     }
3047
3048   if (table_index_set == 0)
3049     return clib_error_return (0, "classify table-index must be specified");
3050
3051   if (sw_if_index == ~0)
3052     return clib_error_return (0, "interface / subif must be specified");
3053
3054   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3055
3056   switch (rv)
3057     {
3058     case 0:
3059       break;
3060
3061     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3062       return clib_error_return (0, "No such interface");
3063
3064     case VNET_API_ERROR_NO_SUCH_ENTRY:
3065       return clib_error_return (0, "No such classifier table");
3066     }
3067   return 0;
3068 }
3069
3070 /*?
3071  * Assign a classification table to an interface. The classification
3072  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3073  * commands. Once the table is create, use this command to filter packets
3074  * on an interface.
3075  *
3076  * @cliexpar
3077  * Example of how to assign a classification table to an interface:
3078  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3079 ?*/
3080 /* *INDENT-OFF* */
3081 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3082 {
3083     .path = "set ip classify",
3084     .short_help =
3085     "set ip classify intfc <interface> table-index <classify-idx>",
3086     .function = set_ip_classify_command_fn,
3087 };
3088 /* *INDENT-ON* */
3089
3090 static clib_error_t *
3091 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3092 {
3093   ip4_main_t *im = &ip4_main;
3094   uword heapsize = 0;
3095
3096   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3097     {
3098       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3099         ;
3100       else if (unformat (input, "mtrie-hugetlb %=", &im->mtrie_hugetlb, 1))
3101         ;
3102       else
3103         return clib_error_return (0,
3104                                   "invalid heap-size parameter `%U'",
3105                                   format_unformat_error, input);
3106
3107     }
3108
3109   im->mtrie_heap_size = heapsize;
3110
3111   return 0;
3112 }
3113
3114 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3115
3116 /*
3117  * fd.io coding-style-patch-verification: ON
3118  *
3119  * Local Variables:
3120  * eval: (c-set-style "gnu")
3121  * End:
3122  */