session: fix transport proto unformat
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381   if (a->address_length <= 30)
382     {
383       pfx_special.fp_len = a->address_length;
384       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386       /* set the glean route for the prefix */
387       fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                        FIB_SOURCE_INTERFACE,
389                                        (FIB_ENTRY_FLAG_CONNECTED |
390                                         FIB_ENTRY_FLAG_ATTACHED),
391                                        DPO_PROTO_IP4,
392                                        /* No next-hop address */
393                                        NULL,
394                                        sw_if_index,
395                                        /* invalid FIB index */
396                                        ~0,
397                                        1,
398                                        /* no out-label stack */
399                                        NULL,
400                                        FIB_ROUTE_PATH_FLAG_NONE);
401
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if:
529    * - deleting last intf addr in prefix
530    * - deleting intf addr used as default source address in glean adjacency
531    *
532    * We're done now otherwise
533    */
534   if ((if_prefix->ref_count > 0) &&
535       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541
542       /* remove glean route for prefix */
543       pfx_special.fp_addr.ip4 = *address;
544       pfx_special.fp_len = address_length;
545       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547       /* if no more intf addresses in prefix, remove other special routes */
548       if (!if_prefix->ref_count)
549         {
550           /* first address in prefix */
551           pfx_special.fp_addr.ip4.as_u32 =
552             address->as_u32 & im->fib_masks[address_length];
553           pfx_special.fp_len = 32;
554
555           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556           fib_table_entry_special_remove (fib_index,
557                                           &pfx_special,
558                                           FIB_SOURCE_INTERFACE);
559
560           /* prefix broadcast address */
561           pfx_special.fp_addr.ip4.as_u32 =
562             address->as_u32 | ~im->fib_masks[address_length];
563           pfx_special.fp_len = 32;
564
565           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566           fib_table_entry_special_remove (fib_index,
567                                           &pfx_special,
568                                           FIB_SOURCE_INTERFACE);
569         }
570       else
571         /* default source addr just got deleted, find another */
572         {
573           ip_interface_address_t *new_src_ia = NULL;
574           ip4_address_t *new_src_addr = NULL;
575
576           new_src_addr =
577             ip4_interface_address_matching_destination
578               (im, address, sw_if_index, &new_src_ia);
579
580           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582           pfx_special.fp_len = address_length;
583           pfx_special.fp_addr.ip4 = *new_src_addr;
584
585           /* set new glean route for the prefix */
586           fib_table_entry_update_one_path (fib_index, &pfx_special,
587                                            FIB_SOURCE_INTERFACE,
588                                            (FIB_ENTRY_FLAG_CONNECTED |
589                                             FIB_ENTRY_FLAG_ATTACHED),
590                                            DPO_PROTO_IP4,
591                                            /* No next-hop address */
592                                            NULL,
593                                            sw_if_index,
594                                            /* invalid FIB index */
595                                            ~0,
596                                            1,
597                                            /* no out-label stack */
598                                            NULL,
599                                            FIB_ROUTE_PATH_FLAG_NONE);
600           return;
601         }
602     }
603   /* length == 31, delete attached route for the other address */
604   else if (address_length == 31)
605     {
606       pfx_special.fp_addr.ip4.as_u32 =
607         address->as_u32 ^ clib_host_to_net_u32(1);
608
609       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610     }
611
612   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613   pool_put (lm->if_prefix_pool, if_prefix);
614 }
615
616 static void
617 ip4_del_interface_routes (u32 sw_if_index,
618                           ip4_main_t * im,
619                           u32 fib_index,
620                           ip4_address_t * address, u32 address_length)
621 {
622   fib_prefix_t pfx = {
623     .fp_len = address_length,
624     .fp_proto = FIB_PROTOCOL_IP4,
625     .fp_addr.ip4 = *address,
626   };
627
628   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629                                    address, address_length);
630
631   pfx.fp_len = 32;
632   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633 }
634
635 #ifndef CLIB_MARCH_VARIANT
636 void
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638 {
639   ip4_main_t *im = &ip4_main;
640
641   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643   /*
644    * enable/disable only on the 1<->0 transition
645    */
646   if (is_enable)
647     {
648       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649         return;
650     }
651   else
652     {
653       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655         return;
656     }
657   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658                                !is_enable, 0, 0);
659
660
661   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662                                sw_if_index, !is_enable, 0, 0);
663
664   {
665     ip4_enable_disable_interface_callback_t *cb;
666     vec_foreach (cb, im->enable_disable_interface_callbacks)
667       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668   }
669 }
670
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
673                                         u32 sw_if_index,
674                                         ip4_address_t * address,
675                                         u32 address_length, u32 is_del)
676 {
677   vnet_main_t *vnm = vnet_get_main ();
678   ip4_main_t *im = &ip4_main;
679   ip_lookup_main_t *lm = &im->lookup_main;
680   clib_error_t *error = 0;
681   u32 if_address_index, elts_before;
682   ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684   /* local0 interface doesn't support IP addressing  */
685   if (sw_if_index == 0)
686     {
687       return
688        clib_error_create ("local0 interface doesn't support IP addressing");
689     }
690
691   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692   ip4_addr_fib_init (&ip4_af, address,
693                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694   vec_add1 (addr_fib, ip4_af);
695
696   /*
697    * there is no support for adj-fib handling in the presence of overlapping
698    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699    * most routers do.
700    */
701   /* *INDENT-OFF* */
702   if (!is_del)
703     {
704       /* When adding an address check that it does not conflict
705          with an existing address on any interface in this table. */
706       ip_interface_address_t *ia;
707       vnet_sw_interface_t *sif;
708
709       pool_foreach(sif, vnm->interface_main.sw_interfaces,
710       ({
711           if (im->fib_index_by_sw_if_index[sw_if_index] ==
712               im->fib_index_by_sw_if_index[sif->sw_if_index])
713             {
714               foreach_ip_interface_address
715                 (&im->lookup_main, ia, sif->sw_if_index,
716                  0 /* honor unnumbered */ ,
717                  ({
718                    ip4_address_t * x =
719                      ip_interface_address_get_address
720                      (&im->lookup_main, ia);
721                    if (ip4_destination_matches_route
722                        (im, address, x, ia->address_length) ||
723                        ip4_destination_matches_route (im,
724                                                       x,
725                                                       address,
726                                                       address_length))
727                      {
728                        /* an intf may have >1 addr from the same prefix */
729                        if ((sw_if_index == sif->sw_if_index) &&
730                            (ia->address_length == address_length) &&
731                            (x->as_u32 != address->as_u32))
732                          continue;
733
734                        /* error if the length or intf was different */
735                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                        return
738                          clib_error_create
739                          ("failed to add %U which conflicts with %U for interface %U",
740                           format_ip4_address_and_length, address,
741                           address_length,
742                           format_ip4_address_and_length, x,
743                           ia->address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sif->sw_if_index);
746                      }
747                  }));
748             }
749       }));
750     }
751   /* *INDENT-ON* */
752
753   elts_before = pool_elts (lm->if_address_pool);
754
755   error = ip_interface_address_add_del
756     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
757   if (error)
758     goto done;
759
760   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
761
762   /* intf addr routes are added/deleted on admin up/down */
763   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
764     {
765       if (is_del)
766         ip4_del_interface_routes (sw_if_index,
767                                   im, ip4_af.fib_index, address,
768                                   address_length);
769       else
770         ip4_add_interface_routes (sw_if_index,
771                                   im, ip4_af.fib_index,
772                                   pool_elt_at_index
773                                   (lm->if_address_pool, if_address_index));
774     }
775
776   /* If pool did not grow/shrink: add duplicate address. */
777   if (elts_before != pool_elts (lm->if_address_pool))
778     {
779       ip4_add_del_interface_address_callback_t *cb;
780       vec_foreach (cb, im->add_del_interface_address_callbacks)
781         cb->function (im, cb->function_opaque, sw_if_index,
782                       address, address_length, if_address_index, is_del);
783     }
784
785 done:
786   vec_free (addr_fib);
787   return error;
788 }
789
790 clib_error_t *
791 ip4_add_del_interface_address (vlib_main_t * vm,
792                                u32 sw_if_index,
793                                ip4_address_t * address,
794                                u32 address_length, u32 is_del)
795 {
796   return ip4_add_del_interface_address_internal
797     (vm, sw_if_index, address, address_length, is_del);
798 }
799
800 void
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
802 {
803   ip_interface_address_t *ia;
804   ip4_main_t *im;
805
806   im = &ip4_main;
807
808   /*
809    * when directed broadcast is enabled, the subnet braodcast route will forward
810    * packets using an adjacency with a broadcast MAC. otherwise it drops
811    */
812   /* *INDENT-OFF* */
813   foreach_ip_interface_address(&im->lookup_main, ia,
814                                sw_if_index, 0,
815      ({
816        if (ia->address_length <= 30)
817          {
818            ip4_address_t *ipa;
819
820            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
821
822            fib_prefix_t pfx = {
823              .fp_len = 32,
824              .fp_proto = FIB_PROTOCOL_IP4,
825              .fp_addr = {
826                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
827              },
828            };
829
830            ip4_add_subnet_bcast_route
831              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
832                                                   sw_if_index),
833               &pfx, sw_if_index);
834          }
835      }));
836   /* *INDENT-ON* */
837 }
838 #endif
839
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
842 {
843   ip4_main_t *im = &ip4_main;
844   ip_interface_address_t *ia;
845   ip4_address_t *a;
846   u32 is_admin_up, fib_index;
847
848   /* Fill in lookup tables with default table (0). */
849   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
850
851   vec_validate_init_empty (im->
852                            lookup_main.if_address_pool_index_by_sw_if_index,
853                            sw_if_index, ~0);
854
855   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
856
857   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
858
859   /* *INDENT-OFF* */
860   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861                                 0 /* honor unnumbered */,
862   ({
863     a = ip_interface_address_get_address (&im->lookup_main, ia);
864     if (is_admin_up)
865       ip4_add_interface_routes (sw_if_index,
866                                 im, fib_index,
867                                 ia);
868     else
869       ip4_del_interface_routes (sw_if_index,
870                                 im, fib_index,
871                                 a, ia->address_length);
872   }));
873   /* *INDENT-ON* */
874
875   return 0;
876 }
877
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
879
880 /* Built-in ip4 unicast rx feature path definition */
881 /* *INDENT-OFF* */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
883 {
884   .arc_name = "ip4-unicast",
885   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886   .last_in_arc = "ip4-lookup",
887   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
888 };
889
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
891 {
892   .arc_name = "ip4-unicast",
893   .node_name = "ip4-flow-classify",
894   .runs_before = VNET_FEATURES ("ip4-inacl"),
895 };
896
897 VNET_FEATURE_INIT (ip4_inacl, static) =
898 {
899   .arc_name = "ip4-unicast",
900   .node_name = "ip4-inacl",
901   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
902 };
903
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
905 {
906   .arc_name = "ip4-unicast",
907   .node_name = "ip4-source-check-via-rx",
908   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
909 };
910
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
912 {
913   .arc_name = "ip4-unicast",
914   .node_name = "ip4-source-check-via-any",
915   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
916 };
917
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-source-and-port-range-check-rx",
922   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
923 };
924
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-policer-classify",
929   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
930 };
931
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ipsec4-input-feature",
936   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
937 };
938
939 VNET_FEATURE_INIT (ip4_vpath, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "vpath-input-ip4",
943   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
944 };
945
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-vxlan-bypass",
950   .runs_before = VNET_FEATURES ("ip4-lookup"),
951 };
952
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-not-enabled",
957   .runs_before = VNET_FEATURES ("ip4-lookup"),
958 };
959
960 VNET_FEATURE_INIT (ip4_lookup, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-lookup",
964   .runs_before = 0,     /* not before any other features */
965 };
966
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
969 {
970   .arc_name = "ip4-multicast",
971   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972   .last_in_arc = "ip4-mfib-forward-lookup",
973   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
974 };
975
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
977 {
978   .arc_name = "ip4-multicast",
979   .node_name = "vpath-input-ip4",
980   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
981 };
982
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
984 {
985   .arc_name = "ip4-multicast",
986   .node_name = "ip4-not-enabled",
987   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
988 };
989
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "ip4-mfib-forward-lookup",
994   .runs_before = 0,     /* last feature */
995 };
996
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
999 {
1000   .arc_name = "ip4-output",
1001   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002   .last_in_arc = "interface-output",
1003   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1007 {
1008   .arc_name = "ip4-output",
1009   .node_name = "ip4-source-and-port-range-check-tx",
1010   .runs_before = VNET_FEATURES ("ip4-outacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1014 {
1015   .arc_name = "ip4-output",
1016   .node_name = "ip4-outacl",
1017   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ipsec4-output-feature",
1024   .runs_before = VNET_FEATURES ("interface-output"),
1025 };
1026
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1029 {
1030   .arc_name = "ip4-output",
1031   .node_name = "interface-output",
1032   .runs_before = 0,     /* not before any other features */
1033 };
1034 /* *INDENT-ON* */
1035
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1038 {
1039   ip4_main_t *im = &ip4_main;
1040
1041   /* Fill in lookup tables with default table (0). */
1042   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1044
1045   if (!is_add)
1046     {
1047       ip4_main_t *im4 = &ip4_main;
1048       ip_lookup_main_t *lm4 = &im4->lookup_main;
1049       ip_interface_address_t *ia = 0;
1050       ip4_address_t *address;
1051       vlib_main_t *vm = vlib_get_main ();
1052
1053       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1054       /* *INDENT-OFF* */
1055       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1056       ({
1057         address = ip_interface_address_get_address (lm4, ia);
1058         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1059       }));
1060       /* *INDENT-ON* */
1061     }
1062
1063   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1064                                is_add, 0, 0);
1065
1066   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067                                sw_if_index, is_add, 0, 0);
1068
1069   return /* no error */ 0;
1070 }
1071
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1073
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1078
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1081 {
1082   ip4_main_t *im = &ip4_main;
1083   clib_error_t *error;
1084   uword i;
1085
1086   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1087     return error;
1088   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1089     return (error);
1090   if ((error = vlib_call_init_function (vm, fib_module_init)))
1091     return error;
1092   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1093     return error;
1094
1095   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1096     {
1097       u32 m;
1098
1099       if (i < 32)
1100         m = pow2_mask (i) << (32 - i);
1101       else
1102         m = ~0;
1103       im->fib_masks[i] = clib_host_to_net_u32 (m);
1104     }
1105
1106   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1107
1108   /* Create FIB with index 0 and table id of 0. */
1109   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110                                      FIB_SOURCE_DEFAULT_ROUTE);
1111   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112                                       MFIB_SOURCE_DEFAULT_ROUTE);
1113
1114   {
1115     pg_node_t *pn;
1116     pn = pg_get_node (ip4_lookup_node.index);
1117     pn->unformat_edit = unformat_pg_ip4_header;
1118   }
1119
1120   {
1121     ethernet_arp_header_t h;
1122
1123     clib_memset (&h, 0, sizeof (h));
1124
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128     _16 (l3_type, ETHERNET_TYPE_IP4);
1129     _8 (n_l2_address_bytes, 6);
1130     _8 (n_l3_address_bytes, 4);
1131     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1132 #undef _16
1133 #undef _8
1134
1135     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1136                                /* data */ &h,
1137                                sizeof (h),
1138                                /* alloc chunk size */ 8,
1139                                "ip4 arp");
1140   }
1141
1142   return error;
1143 }
1144
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1146
1147 typedef struct
1148 {
1149   /* Adjacency taken. */
1150   u32 dpo_index;
1151   u32 flow_hash;
1152   u32 fib_index;
1153
1154   /* Packet data, possibly *after* rewrite. */
1155   u8 packet_data[64 - 1 * sizeof (u32)];
1156 }
1157 ip4_forward_next_trace_t;
1158
1159 #ifndef CLIB_MARCH_VARIANT
1160 u8 *
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1162 {
1163   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166   u32 indent = format_get_indent (s);
1167   s = format (s, "%U%U",
1168               format_white_space, indent,
1169               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1170   return s;
1171 }
1172 #endif
1173
1174 static u8 *
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1176 {
1177   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180   u32 indent = format_get_indent (s);
1181
1182   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183               t->fib_index, t->dpo_index, t->flow_hash);
1184   s = format (s, "\n%U%U",
1185               format_white_space, indent,
1186               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1187   return s;
1188 }
1189
1190 static u8 *
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1192 {
1193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196   u32 indent = format_get_indent (s);
1197
1198   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199               t->fib_index, t->dpo_index, format_ip_adjacency,
1200               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201   s = format (s, "\n%U%U",
1202               format_white_space, indent,
1203               format_ip_adjacency_packet_data,
1204               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1205   return s;
1206 }
1207
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1210 void
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212                         vlib_node_runtime_t * node,
1213                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1214 {
1215   u32 *from, n_left;
1216   ip4_main_t *im = &ip4_main;
1217
1218   n_left = frame->n_vectors;
1219   from = vlib_frame_vector_args (frame);
1220
1221   while (n_left >= 4)
1222     {
1223       u32 bi0, bi1;
1224       vlib_buffer_t *b0, *b1;
1225       ip4_forward_next_trace_t *t0, *t1;
1226
1227       /* Prefetch next iteration. */
1228       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1230
1231       bi0 = from[0];
1232       bi1 = from[1];
1233
1234       b0 = vlib_get_buffer (vm, bi0);
1235       b1 = vlib_get_buffer (vm, bi1);
1236
1237       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1238         {
1239           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1242           t0->fib_index =
1243             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245             vec_elt (im->fib_index_by_sw_if_index,
1246                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1247
1248           clib_memcpy_fast (t0->packet_data,
1249                             vlib_buffer_get_current (b0),
1250                             sizeof (t0->packet_data));
1251         }
1252       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1253         {
1254           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1257           t1->fib_index =
1258             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260             vec_elt (im->fib_index_by_sw_if_index,
1261                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263                             sizeof (t1->packet_data));
1264         }
1265       from += 2;
1266       n_left -= 2;
1267     }
1268
1269   while (n_left >= 1)
1270     {
1271       u32 bi0;
1272       vlib_buffer_t *b0;
1273       ip4_forward_next_trace_t *t0;
1274
1275       bi0 = from[0];
1276
1277       b0 = vlib_get_buffer (vm, bi0);
1278
1279       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1280         {
1281           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1284           t0->fib_index =
1285             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287             vec_elt (im->fib_index_by_sw_if_index,
1288                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290                             sizeof (t0->packet_data));
1291         }
1292       from += 1;
1293       n_left -= 1;
1294     }
1295 }
1296
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1298 u16
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1300                               ip4_header_t * ip0)
1301 {
1302   ip_csum_t sum0;
1303   u32 ip_header_length, payload_length_host_byte_order;
1304
1305   /* Initialize checksum with ip header. */
1306   ip_header_length = ip4_header_bytes (ip0);
1307   payload_length_host_byte_order =
1308     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1309   sum0 =
1310     clib_host_to_net_u32 (payload_length_host_byte_order +
1311                           (ip0->protocol << 16));
1312
1313   if (BITS (uword) == 32)
1314     {
1315       sum0 =
1316         ip_csum_with_carry (sum0,
1317                             clib_mem_unaligned (&ip0->src_address, u32));
1318       sum0 =
1319         ip_csum_with_carry (sum0,
1320                             clib_mem_unaligned (&ip0->dst_address, u32));
1321     }
1322   else
1323     sum0 =
1324       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1325
1326   return ip_calculate_l4_checksum (vm, p0, sum0,
1327                                    payload_length_host_byte_order, (u8 *) ip0,
1328                                    ip_header_length, NULL);
1329 }
1330
1331 u32
1332 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1333 {
1334   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1335   udp_header_t *udp0;
1336   u16 sum16;
1337
1338   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1339           || ip0->protocol == IP_PROTOCOL_UDP);
1340
1341   udp0 = (void *) (ip0 + 1);
1342   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1343     {
1344       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1345                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1346       return p0->flags;
1347     }
1348
1349   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1350
1351   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1352                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1353
1354   return p0->flags;
1355 }
1356 #endif
1357
1358 /* *INDENT-OFF* */
1359 VNET_FEATURE_ARC_INIT (ip4_local) =
1360 {
1361   .arc_name  = "ip4-local",
1362   .start_nodes = VNET_FEATURES ("ip4-local"),
1363   .last_in_arc = "ip4-local-end-of-arc",
1364 };
1365 /* *INDENT-ON* */
1366
1367 static inline void
1368 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1369                             ip4_header_t * ip, u8 is_udp, u8 * error,
1370                             u8 * good_tcp_udp)
1371 {
1372   u32 flags0;
1373   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1374   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1375   if (is_udp)
1376     {
1377       udp_header_t *udp;
1378       u32 ip_len, udp_len;
1379       i32 len_diff;
1380       udp = ip4_next_header (ip);
1381       /* Verify UDP length. */
1382       ip_len = clib_net_to_host_u16 (ip->length);
1383       udp_len = clib_net_to_host_u16 (udp->length);
1384
1385       len_diff = ip_len - udp_len;
1386       *good_tcp_udp &= len_diff >= 0;
1387       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1388     }
1389 }
1390
1391 #define ip4_local_csum_is_offloaded(_b)                                 \
1392     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1393         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1394
1395 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1396     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1397         || ip4_local_csum_is_offloaded (_b)))
1398
1399 #define ip4_local_csum_is_valid(_b)                                     \
1400     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1401         || (ip4_local_csum_is_offloaded (_b))) != 0
1402
1403 static inline void
1404 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1405                          ip4_header_t * ih, u8 * error)
1406 {
1407   u8 is_udp, is_tcp_udp, good_tcp_udp;
1408
1409   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1410   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1411
1412   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1413     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1414   else
1415     good_tcp_udp = ip4_local_csum_is_valid (b);
1416
1417   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1418   *error = (is_tcp_udp && !good_tcp_udp
1419             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1420 }
1421
1422 static inline void
1423 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1424                             ip4_header_t ** ih, u8 * error)
1425 {
1426   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1427
1428   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1429   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1430
1431   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1432   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1433
1434   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1435   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1436
1437   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1438                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1439     {
1440       if (is_tcp_udp[0])
1441         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1442                                     &good_tcp_udp[0]);
1443       if (is_tcp_udp[1])
1444         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1445                                     &good_tcp_udp[1]);
1446     }
1447
1448   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1449               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1450   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1451               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1452 }
1453
1454 static inline void
1455 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1456                               vlib_buffer_t * b, u16 * next, u8 error,
1457                               u8 head_of_feature_arc)
1458 {
1459   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1460   u32 next_index;
1461
1462   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1463   b->error = error ? error_node->errors[error] : 0;
1464   if (head_of_feature_arc)
1465     {
1466       next_index = *next;
1467       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1468         {
1469           vnet_feature_arc_start (arc_index,
1470                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1471                                   &next_index, b);
1472           *next = next_index;
1473         }
1474     }
1475 }
1476
1477 typedef struct
1478 {
1479   ip4_address_t src;
1480   u32 lbi;
1481   u8 error;
1482   u8 first;
1483 } ip4_local_last_check_t;
1484
1485 static inline void
1486 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1487                      ip4_local_last_check_t * last_check, u8 * error0)
1488 {
1489   ip4_fib_mtrie_leaf_t leaf0;
1490   ip4_fib_mtrie_t *mtrie0;
1491   const dpo_id_t *dpo0;
1492   load_balance_t *lb0;
1493   u32 lbi0;
1494
1495   vnet_buffer (b)->ip.fib_index =
1496     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1497     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1498
1499   /*
1500    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1501    *  adjacency for the destination address (the local interface address).
1502    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1503    *  adjacency for the source address (the remote sender's address)
1504    */
1505   if (PREDICT_FALSE (last_check->first ||
1506                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1507     {
1508       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1509       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1510       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1511       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1512       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1513
1514       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1515         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1516       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1517
1518       lb0 = load_balance_get (lbi0);
1519       dpo0 = load_balance_get_bucket_i (lb0, 0);
1520
1521       /*
1522        * Must have a route to source otherwise we drop the packet.
1523        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1524        *
1525        * The checks are:
1526        *  - the source is a recieve => it's from us => bogus, do this
1527        *    first since it sets a different error code.
1528        *  - uRPF check for any route to source - accept if passes.
1529        *  - allow packets destined to the broadcast address from unknown sources
1530        */
1531
1532       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1533                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1534                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1535       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1536                   && !fib_urpf_check_size (lb0->lb_urpf)
1537                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1538                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1539
1540       last_check->src.as_u32 = ip0->src_address.as_u32;
1541       last_check->lbi = lbi0;
1542       last_check->error = *error0;
1543     }
1544   else
1545     {
1546       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1547         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1548       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1549       *error0 = last_check->error;
1550       last_check->first = 0;
1551     }
1552 }
1553
1554 static inline void
1555 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1556                         ip4_local_last_check_t * last_check, u8 * error)
1557 {
1558   ip4_fib_mtrie_leaf_t leaf[2];
1559   ip4_fib_mtrie_t *mtrie[2];
1560   const dpo_id_t *dpo[2];
1561   load_balance_t *lb[2];
1562   u32 not_last_hit;
1563   u32 lbi[2];
1564
1565   not_last_hit = last_check->first;
1566   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1567   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1568
1569   vnet_buffer (b[0])->ip.fib_index =
1570     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1571     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1572     vnet_buffer (b[0])->ip.fib_index;
1573
1574   vnet_buffer (b[1])->ip.fib_index =
1575     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1576     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1577     vnet_buffer (b[1])->ip.fib_index;
1578
1579   /*
1580    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1581    *  adjacency for the destination address (the local interface address).
1582    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1583    *  adjacency for the source address (the remote sender's address)
1584    */
1585   if (PREDICT_FALSE (not_last_hit))
1586     {
1587       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1588       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1589
1590       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1591       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1592
1593       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1594                                            &ip[0]->src_address, 2);
1595       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1596                                            &ip[1]->src_address, 2);
1597
1598       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1599                                            &ip[0]->src_address, 3);
1600       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1601                                            &ip[1]->src_address, 3);
1602
1603       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1604       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1605
1606       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1607         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1608       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1609
1610       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1611         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1612       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1613
1614       lb[0] = load_balance_get (lbi[0]);
1615       lb[1] = load_balance_get (lbi[1]);
1616
1617       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1618       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1619
1620       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1621                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1622                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1623       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1625                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1626                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1627
1628       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1629                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1630                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1631       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1632                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1633                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1634                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1635
1636       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1637       last_check->lbi = lbi[1];
1638       last_check->error = error[1];
1639     }
1640   else
1641     {
1642       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1643         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1644       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1645
1646       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1647         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1648       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1649
1650       error[0] = last_check->error;
1651       error[1] = last_check->error;
1652       last_check->first = 0;
1653     }
1654 }
1655
1656 enum ip_local_packet_type_e
1657 {
1658   IP_LOCAL_PACKET_TYPE_L4,
1659   IP_LOCAL_PACKET_TYPE_NAT,
1660   IP_LOCAL_PACKET_TYPE_FRAG,
1661 };
1662
1663 /**
1664  * Determine packet type and next node.
1665  *
1666  * The expectation is that all packets that are not L4 will skip
1667  * checksums and source checks.
1668  */
1669 always_inline u8
1670 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1671 {
1672   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1673
1674   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1675     {
1676       *next = IP_LOCAL_NEXT_REASSEMBLY;
1677       return IP_LOCAL_PACKET_TYPE_FRAG;
1678     }
1679   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1680     {
1681       *next = lm->local_next_by_ip_protocol[ip->protocol];
1682       return IP_LOCAL_PACKET_TYPE_NAT;
1683     }
1684
1685   *next = lm->local_next_by_ip_protocol[ip->protocol];
1686   return IP_LOCAL_PACKET_TYPE_L4;
1687 }
1688
1689 static inline uword
1690 ip4_local_inline (vlib_main_t * vm,
1691                   vlib_node_runtime_t * node,
1692                   vlib_frame_t * frame, int head_of_feature_arc)
1693 {
1694   u32 *from, n_left_from;
1695   vlib_node_runtime_t *error_node =
1696     vlib_node_get_runtime (vm, ip4_input_node.index);
1697   u16 nexts[VLIB_FRAME_SIZE], *next;
1698   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1699   ip4_header_t *ip[2];
1700   u8 error[2], pt[2];
1701
1702   ip4_local_last_check_t last_check = {
1703     /*
1704      * 0.0.0.0 can appear as the source address of an IP packet,
1705      * as can any other address, hence the need to use the 'first'
1706      * member to make sure the .lbi is initialised for the first
1707      * packet.
1708      */
1709     .src = {.as_u32 = 0},
1710     .lbi = ~0,
1711     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1712     .first = 1,
1713   };
1714
1715   from = vlib_frame_vector_args (frame);
1716   n_left_from = frame->n_vectors;
1717
1718   if (node->flags & VLIB_NODE_FLAG_TRACE)
1719     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1720
1721   vlib_get_buffers (vm, from, bufs, n_left_from);
1722   b = bufs;
1723   next = nexts;
1724
1725   while (n_left_from >= 6)
1726     {
1727       u8 not_batch = 0;
1728
1729       /* Prefetch next iteration. */
1730       {
1731         vlib_prefetch_buffer_header (b[4], LOAD);
1732         vlib_prefetch_buffer_header (b[5], LOAD);
1733
1734         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1735         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1736       }
1737
1738       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1739
1740       ip[0] = vlib_buffer_get_current (b[0]);
1741       ip[1] = vlib_buffer_get_current (b[1]);
1742
1743       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1744       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1745
1746       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1747       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1748
1749       not_batch = pt[0] ^ pt[1];
1750
1751       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1752         goto skip_checks;
1753
1754       if (PREDICT_TRUE (not_batch == 0))
1755         {
1756           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1757           ip4_local_check_src_x2 (b, ip, &last_check, error);
1758         }
1759       else
1760         {
1761           if (!pt[0])
1762             {
1763               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1764               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1765             }
1766           if (!pt[1])
1767             {
1768               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1769               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1770             }
1771         }
1772
1773     skip_checks:
1774
1775       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1776                                     head_of_feature_arc);
1777       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1778                                     head_of_feature_arc);
1779
1780       b += 2;
1781       next += 2;
1782       n_left_from -= 2;
1783     }
1784
1785   while (n_left_from > 0)
1786     {
1787       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1788
1789       ip[0] = vlib_buffer_get_current (b[0]);
1790       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1791       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1792
1793       if (head_of_feature_arc == 0 || pt[0])
1794         goto skip_check;
1795
1796       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1797       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1798
1799     skip_check:
1800
1801       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1802                                     head_of_feature_arc);
1803
1804       b += 1;
1805       next += 1;
1806       n_left_from -= 1;
1807     }
1808
1809   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1810   return frame->n_vectors;
1811 }
1812
1813 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1814                                vlib_frame_t * frame)
1815 {
1816   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1817 }
1818
1819 /* *INDENT-OFF* */
1820 VLIB_REGISTER_NODE (ip4_local_node) =
1821 {
1822   .name = "ip4-local",
1823   .vector_size = sizeof (u32),
1824   .format_trace = format_ip4_forward_next_trace,
1825   .n_next_nodes = IP_LOCAL_N_NEXT,
1826   .next_nodes =
1827   {
1828     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1829     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1830     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1831     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1832     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1833   },
1834 };
1835 /* *INDENT-ON* */
1836
1837
1838 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1839                                           vlib_node_runtime_t * node,
1840                                           vlib_frame_t * frame)
1841 {
1842   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1843 }
1844
1845 /* *INDENT-OFF* */
1846 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1847   .name = "ip4-local-end-of-arc",
1848   .vector_size = sizeof (u32),
1849
1850   .format_trace = format_ip4_forward_next_trace,
1851   .sibling_of = "ip4-local",
1852 };
1853
1854 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1855   .arc_name = "ip4-local",
1856   .node_name = "ip4-local-end-of-arc",
1857   .runs_before = 0, /* not before any other features */
1858 };
1859 /* *INDENT-ON* */
1860
1861 #ifndef CLIB_MARCH_VARIANT
1862 void
1863 ip4_register_protocol (u32 protocol, u32 node_index)
1864 {
1865   vlib_main_t *vm = vlib_get_main ();
1866   ip4_main_t *im = &ip4_main;
1867   ip_lookup_main_t *lm = &im->lookup_main;
1868
1869   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1870   lm->local_next_by_ip_protocol[protocol] =
1871     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1872 }
1873
1874 void
1875 ip4_unregister_protocol (u32 protocol)
1876 {
1877   ip4_main_t *im = &ip4_main;
1878   ip_lookup_main_t *lm = &im->lookup_main;
1879
1880   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1881   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1882 }
1883 #endif
1884
1885 static clib_error_t *
1886 show_ip_local_command_fn (vlib_main_t * vm,
1887                           unformat_input_t * input, vlib_cli_command_t * cmd)
1888 {
1889   ip4_main_t *im = &ip4_main;
1890   ip_lookup_main_t *lm = &im->lookup_main;
1891   int i;
1892
1893   vlib_cli_output (vm, "Protocols handled by ip4_local");
1894   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1895     {
1896       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1897         {
1898           u32 node_index = vlib_get_node (vm,
1899                                           ip4_local_node.index)->
1900             next_nodes[lm->local_next_by_ip_protocol[i]];
1901           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1902                            format_vlib_node_name, vm, node_index);
1903         }
1904     }
1905   return 0;
1906 }
1907
1908
1909
1910 /*?
1911  * Display the set of protocols handled by the local IPv4 stack.
1912  *
1913  * @cliexpar
1914  * Example of how to display local protocol table:
1915  * @cliexstart{show ip local}
1916  * Protocols handled by ip4_local
1917  * 1
1918  * 17
1919  * 47
1920  * @cliexend
1921 ?*/
1922 /* *INDENT-OFF* */
1923 VLIB_CLI_COMMAND (show_ip_local, static) =
1924 {
1925   .path = "show ip local",
1926   .function = show_ip_local_command_fn,
1927   .short_help = "show ip local",
1928 };
1929 /* *INDENT-ON* */
1930
1931 always_inline uword
1932 ip4_arp_inline (vlib_main_t * vm,
1933                 vlib_node_runtime_t * node,
1934                 vlib_frame_t * frame, int is_glean)
1935 {
1936   vnet_main_t *vnm = vnet_get_main ();
1937   ip4_main_t *im = &ip4_main;
1938   ip_lookup_main_t *lm = &im->lookup_main;
1939   u32 *from, *to_next_drop;
1940   uword n_left_from, n_left_to_next_drop, next_index;
1941   u32 thread_index = vm->thread_index;
1942   u64 seed;
1943
1944   if (node->flags & VLIB_NODE_FLAG_TRACE)
1945     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1946
1947   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1948
1949   from = vlib_frame_vector_args (frame);
1950   n_left_from = frame->n_vectors;
1951   next_index = node->cached_next_index;
1952   if (next_index == IP4_ARP_NEXT_DROP)
1953     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1954
1955   while (n_left_from > 0)
1956     {
1957       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1958                            to_next_drop, n_left_to_next_drop);
1959
1960       while (n_left_from > 0 && n_left_to_next_drop > 0)
1961         {
1962           u32 pi0, bi0, adj_index0, sw_if_index0;
1963           ip_adjacency_t *adj0;
1964           vlib_buffer_t *p0, *b0;
1965           ip4_address_t resolve0;
1966           ethernet_arp_header_t *h0;
1967           vnet_hw_interface_t *hw_if0;
1968           u64 r0;
1969
1970           pi0 = from[0];
1971           p0 = vlib_get_buffer (vm, pi0);
1972
1973           from += 1;
1974           n_left_from -= 1;
1975           to_next_drop[0] = pi0;
1976           to_next_drop += 1;
1977           n_left_to_next_drop -= 1;
1978
1979           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1980           adj0 = adj_get (adj_index0);
1981
1982           if (is_glean)
1983             {
1984               /* resolve the packet's destination */
1985               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1986               resolve0 = ip0->dst_address;
1987             }
1988           else
1989             {
1990               /* resolve the incomplete adj */
1991               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1992             }
1993
1994           /* combine the address and interface for the hash key */
1995           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1996           r0 = (u64) resolve0.data_u32 << 32;
1997           r0 |= sw_if_index0;
1998
1999           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2000             {
2001               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2002               continue;
2003             }
2004
2005           /*
2006            * the adj has been updated to a rewrite but the node the DPO that got
2007            * us here hasn't - yet. no big deal. we'll drop while we wait.
2008            */
2009           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2010             {
2011               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2012               continue;
2013             }
2014
2015           /*
2016            * Can happen if the control-plane is programming tables
2017            * with traffic flowing; at least that's today's lame excuse.
2018            */
2019           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2020               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2021             {
2022               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2023               continue;
2024             }
2025           /* Send ARP request. */
2026           h0 =
2027             vlib_packet_template_get_packet (vm,
2028                                              &im->ip4_arp_request_packet_template,
2029                                              &bi0);
2030           /* Seems we're out of buffers */
2031           if (PREDICT_FALSE (!h0))
2032             {
2033               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2034               continue;
2035             }
2036
2037           b0 = vlib_get_buffer (vm, bi0);
2038
2039           /* copy the persistent fields from the original */
2040           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2041
2042           /* Add rewrite/encap string for ARP packet. */
2043           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2044
2045           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2046
2047           /* Src ethernet address in ARP header. */
2048           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2049                                   hw_if0->hw_address);
2050           if (is_glean)
2051             {
2052               /* The interface's source address is stashed in the Glean Adj */
2053               h0->ip4_over_ethernet[0].ip4 =
2054                 adj0->sub_type.glean.receive_addr.ip4;
2055             }
2056           else
2057             {
2058               /* Src IP address in ARP header. */
2059               if (ip4_src_address_for_packet (lm, sw_if_index0,
2060                                               &h0->ip4_over_ethernet[0].ip4))
2061                 {
2062                   /* No source address available */
2063                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2064                   vlib_buffer_free (vm, &bi0, 1);
2065                   continue;
2066                 }
2067             }
2068           h0->ip4_over_ethernet[1].ip4 = resolve0;
2069
2070           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2071
2072           vlib_buffer_copy_trace_flag (vm, p0, bi0);
2073           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2074           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2075
2076           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2077
2078           vlib_set_next_frame_buffer (vm, node,
2079                                       adj0->rewrite_header.next_index, bi0);
2080         }
2081
2082       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2083     }
2084
2085   return frame->n_vectors;
2086 }
2087
2088 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2089                              vlib_frame_t * frame)
2090 {
2091   return (ip4_arp_inline (vm, node, frame, 0));
2092 }
2093
2094 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2095                                vlib_frame_t * frame)
2096 {
2097   return (ip4_arp_inline (vm, node, frame, 1));
2098 }
2099
2100 static char *ip4_arp_error_strings[] = {
2101   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2102   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2103   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2104   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2105   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2106   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2107 };
2108
2109 /* *INDENT-OFF* */
2110 VLIB_REGISTER_NODE (ip4_arp_node) =
2111 {
2112   .name = "ip4-arp",
2113   .vector_size = sizeof (u32),
2114   .format_trace = format_ip4_forward_next_trace,
2115   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2116   .error_strings = ip4_arp_error_strings,
2117   .n_next_nodes = IP4_ARP_N_NEXT,
2118   .next_nodes =
2119   {
2120     [IP4_ARP_NEXT_DROP] = "error-drop",
2121   },
2122 };
2123
2124 VLIB_REGISTER_NODE (ip4_glean_node) =
2125 {
2126   .name = "ip4-glean",
2127   .vector_size = sizeof (u32),
2128   .format_trace = format_ip4_forward_next_trace,
2129   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2130   .error_strings = ip4_arp_error_strings,
2131   .n_next_nodes = IP4_ARP_N_NEXT,
2132   .next_nodes = {
2133   [IP4_ARP_NEXT_DROP] = "error-drop",
2134   },
2135 };
2136 /* *INDENT-ON* */
2137
2138 #define foreach_notrace_ip4_arp_error           \
2139 _(THROTTLED)                                    \
2140 _(RESOLVED)                                     \
2141 _(NO_BUFFERS)                                   \
2142 _(REQUEST_SENT)                                 \
2143 _(NON_ARP_ADJ)                                  \
2144 _(NO_SOURCE_ADDRESS)
2145
2146 static clib_error_t *
2147 arp_notrace_init (vlib_main_t * vm)
2148 {
2149   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2150
2151   /* don't trace ARP request packets */
2152 #define _(a)                                    \
2153     vnet_pcap_drop_trace_filter_add_del         \
2154         (rt->errors[IP4_ARP_ERROR_##a],         \
2155          1 /* is_add */);
2156   foreach_notrace_ip4_arp_error;
2157 #undef _
2158   return 0;
2159 }
2160
2161 VLIB_INIT_FUNCTION (arp_notrace_init);
2162
2163
2164 #ifndef CLIB_MARCH_VARIANT
2165 /* Send an ARP request to see if given destination is reachable on given interface. */
2166 clib_error_t *
2167 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2168                     u8 refresh)
2169 {
2170   vnet_main_t *vnm = vnet_get_main ();
2171   ip4_main_t *im = &ip4_main;
2172   ethernet_arp_header_t *h;
2173   ip4_address_t *src;
2174   ip_interface_address_t *ia;
2175   ip_adjacency_t *adj;
2176   vnet_hw_interface_t *hi;
2177   vnet_sw_interface_t *si;
2178   vlib_buffer_t *b;
2179   adj_index_t ai;
2180   u32 bi = 0;
2181   u8 unicast_rewrite = 0;
2182
2183   si = vnet_get_sw_interface (vnm, sw_if_index);
2184
2185   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2186     {
2187       return clib_error_return (0, "%U: interface %U down",
2188                                 format_ip4_address, dst,
2189                                 format_vnet_sw_if_index_name, vnm,
2190                                 sw_if_index);
2191     }
2192
2193   src =
2194     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2195   if (!src)
2196     {
2197       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2198       return clib_error_return
2199         (0,
2200          "no matching interface address for destination %U (interface %U)",
2201          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2202          sw_if_index);
2203     }
2204
2205   h = vlib_packet_template_get_packet (vm,
2206                                        &im->ip4_arp_request_packet_template,
2207                                        &bi);
2208
2209   if (!h)
2210     return clib_error_return (0, "ARP request packet allocation failed");
2211
2212   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2213   if (PREDICT_FALSE (!hi->hw_address))
2214     {
2215       return clib_error_return (0, "%U: interface %U do not support ip probe",
2216                                 format_ip4_address, dst,
2217                                 format_vnet_sw_if_index_name, vnm,
2218                                 sw_if_index);
2219     }
2220
2221   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2222
2223   h->ip4_over_ethernet[0].ip4 = src[0];
2224   h->ip4_over_ethernet[1].ip4 = dst[0];
2225
2226   b = vlib_get_buffer (vm, bi);
2227   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2228     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2229
2230   ip46_address_t nh = {
2231     .ip4 = *dst,
2232   };
2233
2234   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2235                             VNET_LINK_IP4, &nh, sw_if_index);
2236   adj = adj_get (ai);
2237
2238   /* Peer has been previously resolved, retrieve glean adj instead */
2239   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2240     {
2241       if (refresh)
2242         unicast_rewrite = 1;
2243       else
2244         {
2245           adj_unlock (ai);
2246           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2247                                       VNET_LINK_IP4, sw_if_index, &nh);
2248           adj = adj_get (ai);
2249         }
2250     }
2251
2252   /* Add encapsulation string for software interface (e.g. ethernet header). */
2253   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2254   if (unicast_rewrite)
2255     {
2256       u16 *etype = vlib_buffer_get_current (b) - 2;
2257       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2258     }
2259   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2260
2261   {
2262     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2263     u32 *to_next = vlib_frame_vector_args (f);
2264     to_next[0] = bi;
2265     f->n_vectors = 1;
2266     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2267   }
2268
2269   adj_unlock (ai);
2270   return /* no error */ 0;
2271 }
2272 #endif
2273
2274 typedef enum
2275 {
2276   IP4_REWRITE_NEXT_DROP,
2277   IP4_REWRITE_NEXT_ICMP_ERROR,
2278   IP4_REWRITE_NEXT_FRAGMENT,
2279   IP4_REWRITE_N_NEXT            /* Last */
2280 } ip4_rewrite_next_t;
2281
2282 /**
2283  * This bits of an IPv4 address to mask to construct a multicast
2284  * MAC address
2285  */
2286 #if CLIB_ARCH_IS_BIG_ENDIAN
2287 #define IP4_MCAST_ADDR_MASK 0x007fffff
2288 #else
2289 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2290 #endif
2291
2292 always_inline void
2293 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2294                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2295 {
2296   if (packet_len > adj_packet_bytes)
2297     {
2298       *error = IP4_ERROR_MTU_EXCEEDED;
2299       if (df)
2300         {
2301           icmp4_error_set_vnet_buffer
2302             (b, ICMP4_destination_unreachable,
2303              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2304              adj_packet_bytes);
2305           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2306         }
2307       else
2308         {
2309           /* IP fragmentation */
2310           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2311                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2312           *next = IP4_REWRITE_NEXT_FRAGMENT;
2313         }
2314     }
2315 }
2316
2317 /* Decrement TTL & update checksum.
2318    Works either endian, so no need for byte swap. */
2319 static_always_inline void
2320 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2321                             u32 * error)
2322 {
2323   i32 ttl;
2324   u32 checksum;
2325   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2326     {
2327       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2328       return;
2329     }
2330
2331   ttl = ip->ttl;
2332
2333   /* Input node should have reject packets with ttl 0. */
2334   ASSERT (ip->ttl > 0);
2335
2336   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2337   checksum += checksum >= 0xffff;
2338
2339   ip->checksum = checksum;
2340   ttl -= 1;
2341   ip->ttl = ttl;
2342
2343   /*
2344    * If the ttl drops below 1 when forwarding, generate
2345    * an ICMP response.
2346    */
2347   if (PREDICT_FALSE (ttl <= 0))
2348     {
2349       *error = IP4_ERROR_TIME_EXPIRED;
2350       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2351       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2352                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2353                                    0);
2354       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2355     }
2356
2357   /* Verify checksum. */
2358   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2359           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2360 }
2361
2362
2363 always_inline uword
2364 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2365                              vlib_node_runtime_t * node,
2366                              vlib_frame_t * frame,
2367                              int do_counters, int is_midchain, int is_mcast,
2368                              int do_gso)
2369 {
2370   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2371   u32 *from = vlib_frame_vector_args (frame);
2372   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2373   u16 nexts[VLIB_FRAME_SIZE], *next;
2374   u32 n_left_from;
2375   vlib_node_runtime_t *error_node =
2376     vlib_node_get_runtime (vm, ip4_input_node.index);
2377
2378   n_left_from = frame->n_vectors;
2379   u32 thread_index = vm->thread_index;
2380
2381   vlib_get_buffers (vm, from, bufs, n_left_from);
2382   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2383
2384   if (n_left_from >= 6)
2385     {
2386       int i;
2387       for (i = 2; i < 6; i++)
2388         vlib_prefetch_buffer_header (bufs[i], LOAD);
2389     }
2390
2391   next = nexts;
2392   b = bufs;
2393   while (n_left_from >= 8)
2394     {
2395       ip_adjacency_t *adj0, *adj1;
2396       ip4_header_t *ip0, *ip1;
2397       u32 rw_len0, error0, adj_index0;
2398       u32 rw_len1, error1, adj_index1;
2399       u32 tx_sw_if_index0, tx_sw_if_index1;
2400       u8 *p;
2401
2402       vlib_prefetch_buffer_header (b[6], LOAD);
2403       vlib_prefetch_buffer_header (b[7], LOAD);
2404
2405       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2406       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2407
2408       /*
2409        * pre-fetch the per-adjacency counters
2410        */
2411       if (do_counters)
2412         {
2413           vlib_prefetch_combined_counter (&adjacency_counters,
2414                                           thread_index, adj_index0);
2415           vlib_prefetch_combined_counter (&adjacency_counters,
2416                                           thread_index, adj_index1);
2417         }
2418
2419       ip0 = vlib_buffer_get_current (b[0]);
2420       ip1 = vlib_buffer_get_current (b[1]);
2421
2422       error0 = error1 = IP4_ERROR_NONE;
2423
2424       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2425       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2426
2427       /* Rewrite packet header and updates lengths. */
2428       adj0 = adj_get (adj_index0);
2429       adj1 = adj_get (adj_index1);
2430
2431       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2432       rw_len0 = adj0[0].rewrite_header.data_bytes;
2433       rw_len1 = adj1[0].rewrite_header.data_bytes;
2434       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2435       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2436
2437       p = vlib_buffer_get_current (b[2]);
2438       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2439       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2440
2441       p = vlib_buffer_get_current (b[3]);
2442       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2443       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2444
2445       /* Check MTU of outgoing interface. */
2446       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2447       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2448
2449       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2450         ip0_len = gso_mtu_sz (b[0]);
2451       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2452         ip1_len = gso_mtu_sz (b[1]);
2453
2454       ip4_mtu_check (b[0], ip0_len,
2455                      adj0[0].rewrite_header.max_l3_packet_bytes,
2456                      ip0->flags_and_fragment_offset &
2457                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2458                      next + 0, &error0);
2459       ip4_mtu_check (b[1], ip1_len,
2460                      adj1[0].rewrite_header.max_l3_packet_bytes,
2461                      ip1->flags_and_fragment_offset &
2462                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2463                      next + 1, &error1);
2464
2465       if (is_mcast)
2466         {
2467           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2468                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2469                     IP4_ERROR_SAME_INTERFACE : error0);
2470           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2471                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2472                     IP4_ERROR_SAME_INTERFACE : error1);
2473         }
2474
2475       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2476        * to see the IP header */
2477       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2478         {
2479           u32 next_index = adj0[0].rewrite_header.next_index;
2480           vlib_buffer_advance (b[0], -(word) rw_len0);
2481           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2482           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2483
2484           if (PREDICT_FALSE
2485               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2486             vnet_feature_arc_start (lm->output_feature_arc_index,
2487                                     tx_sw_if_index0, &next_index, b[0]);
2488           next[0] = next_index;
2489         }
2490       else
2491         {
2492           b[0]->error = error_node->errors[error0];
2493         }
2494       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2495         {
2496           u32 next_index = adj1[0].rewrite_header.next_index;
2497           vlib_buffer_advance (b[1], -(word) rw_len1);
2498
2499           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2500           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2501
2502           if (PREDICT_FALSE
2503               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2504             vnet_feature_arc_start (lm->output_feature_arc_index,
2505                                     tx_sw_if_index1, &next_index, b[1]);
2506           next[1] = next_index;
2507         }
2508       else
2509         {
2510           b[1]->error = error_node->errors[error1];
2511         }
2512       if (is_midchain)
2513         {
2514           calc_checksums (vm, b[0]);
2515           calc_checksums (vm, b[1]);
2516         }
2517       /* Guess we are only writing on simple Ethernet header. */
2518       vnet_rewrite_two_headers (adj0[0], adj1[0],
2519                                 ip0, ip1, sizeof (ethernet_header_t));
2520
2521       /*
2522        * Bump the per-adjacency counters
2523        */
2524       if (do_counters)
2525         {
2526           vlib_increment_combined_counter
2527             (&adjacency_counters,
2528              thread_index,
2529              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2530
2531           vlib_increment_combined_counter
2532             (&adjacency_counters,
2533              thread_index,
2534              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2535         }
2536
2537       if (is_midchain)
2538         {
2539           if (adj0->sub_type.midchain.fixup_func)
2540             adj0->sub_type.midchain.fixup_func
2541               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2542           if (adj1->sub_type.midchain.fixup_func)
2543             adj1->sub_type.midchain.fixup_func
2544               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2545         }
2546
2547       if (is_mcast)
2548         {
2549           /*
2550            * copy bytes from the IP address into the MAC rewrite
2551            */
2552           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2553                                       adj0->rewrite_header.dst_mcast_offset,
2554                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2555           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2556                                       adj1->rewrite_header.dst_mcast_offset,
2557                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2558         }
2559
2560       next += 2;
2561       b += 2;
2562       n_left_from -= 2;
2563     }
2564
2565   while (n_left_from > 0)
2566     {
2567       ip_adjacency_t *adj0;
2568       ip4_header_t *ip0;
2569       u32 rw_len0, adj_index0, error0;
2570       u32 tx_sw_if_index0;
2571
2572       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2573
2574       adj0 = adj_get (adj_index0);
2575
2576       if (do_counters)
2577         vlib_prefetch_combined_counter (&adjacency_counters,
2578                                         thread_index, adj_index0);
2579
2580       ip0 = vlib_buffer_get_current (b[0]);
2581
2582       error0 = IP4_ERROR_NONE;
2583
2584       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2585
2586
2587       /* Update packet buffer attributes/set output interface. */
2588       rw_len0 = adj0[0].rewrite_header.data_bytes;
2589       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2590
2591       /* Check MTU of outgoing interface. */
2592       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2593       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2594         ip0_len = gso_mtu_sz (b[0]);
2595
2596       ip4_mtu_check (b[0], ip0_len,
2597                      adj0[0].rewrite_header.max_l3_packet_bytes,
2598                      ip0->flags_and_fragment_offset &
2599                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2600                      next + 0, &error0);
2601
2602       if (is_mcast)
2603         {
2604           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2605                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2606                     IP4_ERROR_SAME_INTERFACE : error0);
2607         }
2608
2609       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2610        * to see the IP header */
2611       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2612         {
2613           u32 next_index = adj0[0].rewrite_header.next_index;
2614           vlib_buffer_advance (b[0], -(word) rw_len0);
2615           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2616           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2617
2618           if (PREDICT_FALSE
2619               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2620             vnet_feature_arc_start (lm->output_feature_arc_index,
2621                                     tx_sw_if_index0, &next_index, b[0]);
2622           next[0] = next_index;
2623         }
2624       else
2625         {
2626           b[0]->error = error_node->errors[error0];
2627         }
2628       if (is_midchain)
2629         {
2630           calc_checksums (vm, b[0]);
2631         }
2632       /* Guess we are only writing on simple Ethernet header. */
2633       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2634
2635       if (do_counters)
2636         vlib_increment_combined_counter
2637           (&adjacency_counters,
2638            thread_index, adj_index0, 1,
2639            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2640
2641       if (is_midchain)
2642         {
2643           if (adj0->sub_type.midchain.fixup_func)
2644             adj0->sub_type.midchain.fixup_func
2645               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2646         }
2647
2648       if (is_mcast)
2649         {
2650           /*
2651            * copy bytes from the IP address into the MAC rewrite
2652            */
2653           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2654                                       adj0->rewrite_header.dst_mcast_offset,
2655                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2656         }
2657
2658       next += 1;
2659       b += 1;
2660       n_left_from -= 1;
2661     }
2662
2663
2664   /* Need to do trace after rewrites to pick up new packet data. */
2665   if (node->flags & VLIB_NODE_FLAG_TRACE)
2666     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2667
2668   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2669   return frame->n_vectors;
2670 }
2671
2672 always_inline uword
2673 ip4_rewrite_inline (vlib_main_t * vm,
2674                     vlib_node_runtime_t * node,
2675                     vlib_frame_t * frame,
2676                     int do_counters, int is_midchain, int is_mcast)
2677 {
2678   vnet_main_t *vnm = vnet_get_main ();
2679   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2680     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2681                                         is_midchain, is_mcast,
2682                                         1 /* do_gso */ );
2683   else
2684     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2685                                         is_midchain, is_mcast,
2686                                         0 /* no do_gso */ );
2687 }
2688
2689
2690 /** @brief IPv4 rewrite node.
2691     @node ip4-rewrite
2692
2693     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2694     header checksum, fetch the ip adjacency, check the outbound mtu,
2695     apply the adjacency rewrite, and send pkts to the adjacency
2696     rewrite header's rewrite_next_index.
2697
2698     @param vm vlib_main_t corresponding to the current thread
2699     @param node vlib_node_runtime_t
2700     @param frame vlib_frame_t whose contents should be dispatched
2701
2702     @par Graph mechanics: buffer metadata, next index usage
2703
2704     @em Uses:
2705     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2706         - the rewrite adjacency index
2707     - <code>adj->lookup_next_index</code>
2708         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2709           the packet will be dropped.
2710     - <code>adj->rewrite_header</code>
2711         - Rewrite string length, rewrite string, next_index
2712
2713     @em Sets:
2714     - <code>b->current_data, b->current_length</code>
2715         - Updated net of applying the rewrite string
2716
2717     <em>Next Indices:</em>
2718     - <code> adj->rewrite_header.next_index </code>
2719       or @c ip4-drop
2720 */
2721
2722 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2723                                  vlib_frame_t * frame)
2724 {
2725   if (adj_are_counters_enabled ())
2726     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2727   else
2728     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2729 }
2730
2731 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2732                                        vlib_node_runtime_t * node,
2733                                        vlib_frame_t * frame)
2734 {
2735   if (adj_are_counters_enabled ())
2736     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2737   else
2738     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2739 }
2740
2741 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2742                                   vlib_node_runtime_t * node,
2743                                   vlib_frame_t * frame)
2744 {
2745   if (adj_are_counters_enabled ())
2746     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2747   else
2748     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2749 }
2750
2751 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2752                                        vlib_node_runtime_t * node,
2753                                        vlib_frame_t * frame)
2754 {
2755   if (adj_are_counters_enabled ())
2756     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2757   else
2758     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2759 }
2760
2761 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2762                                         vlib_node_runtime_t * node,
2763                                         vlib_frame_t * frame)
2764 {
2765   if (adj_are_counters_enabled ())
2766     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2767   else
2768     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2769 }
2770
2771 /* *INDENT-OFF* */
2772 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2773   .name = "ip4-rewrite",
2774   .vector_size = sizeof (u32),
2775
2776   .format_trace = format_ip4_rewrite_trace,
2777
2778   .n_next_nodes = IP4_REWRITE_N_NEXT,
2779   .next_nodes = {
2780     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2781     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2782     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2783   },
2784 };
2785
2786 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2787   .name = "ip4-rewrite-bcast",
2788   .vector_size = sizeof (u32),
2789
2790   .format_trace = format_ip4_rewrite_trace,
2791   .sibling_of = "ip4-rewrite",
2792 };
2793
2794 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2795   .name = "ip4-rewrite-mcast",
2796   .vector_size = sizeof (u32),
2797
2798   .format_trace = format_ip4_rewrite_trace,
2799   .sibling_of = "ip4-rewrite",
2800 };
2801
2802 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2803   .name = "ip4-mcast-midchain",
2804   .vector_size = sizeof (u32),
2805
2806   .format_trace = format_ip4_rewrite_trace,
2807   .sibling_of = "ip4-rewrite",
2808 };
2809
2810 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2811   .name = "ip4-midchain",
2812   .vector_size = sizeof (u32),
2813   .format_trace = format_ip4_forward_next_trace,
2814   .sibling_of =  "ip4-rewrite",
2815 };
2816 /* *INDENT-ON */
2817
2818 static int
2819 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2820 {
2821   ip4_fib_mtrie_t *mtrie0;
2822   ip4_fib_mtrie_leaf_t leaf0;
2823   u32 lbi0;
2824
2825   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2826
2827   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2828   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2829   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2830
2831   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2832
2833   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2834 }
2835
2836 static clib_error_t *
2837 test_lookup_command_fn (vlib_main_t * vm,
2838                         unformat_input_t * input, vlib_cli_command_t * cmd)
2839 {
2840   ip4_fib_t *fib;
2841   u32 table_id = 0;
2842   f64 count = 1;
2843   u32 n;
2844   int i;
2845   ip4_address_t ip4_base_address;
2846   u64 errors = 0;
2847
2848   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2849     {
2850       if (unformat (input, "table %d", &table_id))
2851         {
2852           /* Make sure the entry exists. */
2853           fib = ip4_fib_get (table_id);
2854           if ((fib) && (fib->index != table_id))
2855             return clib_error_return (0, "<fib-index> %d does not exist",
2856                                       table_id);
2857         }
2858       else if (unformat (input, "count %f", &count))
2859         ;
2860
2861       else if (unformat (input, "%U",
2862                          unformat_ip4_address, &ip4_base_address))
2863         ;
2864       else
2865         return clib_error_return (0, "unknown input `%U'",
2866                                   format_unformat_error, input);
2867     }
2868
2869   n = count;
2870
2871   for (i = 0; i < n; i++)
2872     {
2873       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2874         errors++;
2875
2876       ip4_base_address.as_u32 =
2877         clib_host_to_net_u32 (1 +
2878                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2879     }
2880
2881   if (errors)
2882     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2883   else
2884     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2885
2886   return 0;
2887 }
2888
2889 /*?
2890  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2891  * given FIB table to determine if there is a conflict with the
2892  * adjacency table. The fib-id can be determined by using the
2893  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2894  * of 0 is used.
2895  *
2896  * @todo This command uses fib-id, other commands use table-id (not
2897  * just a name, they are different indexes). Would like to change this
2898  * to table-id for consistency.
2899  *
2900  * @cliexpar
2901  * Example of how to run the test lookup command:
2902  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2903  * No errors in 2 lookups
2904  * @cliexend
2905 ?*/
2906 /* *INDENT-OFF* */
2907 VLIB_CLI_COMMAND (lookup_test_command, static) =
2908 {
2909   .path = "test lookup",
2910   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2911   .function = test_lookup_command_fn,
2912 };
2913 /* *INDENT-ON* */
2914
2915 #ifndef CLIB_MARCH_VARIANT
2916 int
2917 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2918 {
2919   u32 fib_index;
2920
2921   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2922
2923   if (~0 == fib_index)
2924     return VNET_API_ERROR_NO_SUCH_FIB;
2925
2926   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2927                                   flow_hash_config);
2928
2929   return 0;
2930 }
2931 #endif
2932
2933 static clib_error_t *
2934 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2935                              unformat_input_t * input,
2936                              vlib_cli_command_t * cmd)
2937 {
2938   int matched = 0;
2939   u32 table_id = 0;
2940   u32 flow_hash_config = 0;
2941   int rv;
2942
2943   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2944     {
2945       if (unformat (input, "table %d", &table_id))
2946         matched = 1;
2947 #define _(a,v) \
2948     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2949       foreach_flow_hash_bit
2950 #undef _
2951         else
2952         break;
2953     }
2954
2955   if (matched == 0)
2956     return clib_error_return (0, "unknown input `%U'",
2957                               format_unformat_error, input);
2958
2959   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2960   switch (rv)
2961     {
2962     case 0:
2963       break;
2964
2965     case VNET_API_ERROR_NO_SUCH_FIB:
2966       return clib_error_return (0, "no such FIB table %d", table_id);
2967
2968     default:
2969       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2970       break;
2971     }
2972
2973   return 0;
2974 }
2975
2976 /*?
2977  * Configure the set of IPv4 fields used by the flow hash.
2978  *
2979  * @cliexpar
2980  * Example of how to set the flow hash on a given table:
2981  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2982  * Example of display the configured flow hash:
2983  * @cliexstart{show ip fib}
2984  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2985  * 0.0.0.0/0
2986  *   unicast-ip4-chain
2987  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2988  *     [0] [@0]: dpo-drop ip6
2989  * 0.0.0.0/32
2990  *   unicast-ip4-chain
2991  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2992  *     [0] [@0]: dpo-drop ip6
2993  * 224.0.0.0/8
2994  *   unicast-ip4-chain
2995  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2996  *     [0] [@0]: dpo-drop ip6
2997  * 6.0.1.2/32
2998  *   unicast-ip4-chain
2999  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3000  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3001  * 7.0.0.1/32
3002  *   unicast-ip4-chain
3003  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3004  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3005  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3006  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3007  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3008  * 240.0.0.0/8
3009  *   unicast-ip4-chain
3010  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3011  *     [0] [@0]: dpo-drop ip6
3012  * 255.255.255.255/32
3013  *   unicast-ip4-chain
3014  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3015  *     [0] [@0]: dpo-drop ip6
3016  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3017  * 0.0.0.0/0
3018  *   unicast-ip4-chain
3019  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3020  *     [0] [@0]: dpo-drop ip6
3021  * 0.0.0.0/32
3022  *   unicast-ip4-chain
3023  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3024  *     [0] [@0]: dpo-drop ip6
3025  * 172.16.1.0/24
3026  *   unicast-ip4-chain
3027  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3028  *     [0] [@4]: ipv4-glean: af_packet0
3029  * 172.16.1.1/32
3030  *   unicast-ip4-chain
3031  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3032  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3033  * 172.16.1.2/32
3034  *   unicast-ip4-chain
3035  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3036  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3037  * 172.16.2.0/24
3038  *   unicast-ip4-chain
3039  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3040  *     [0] [@4]: ipv4-glean: af_packet1
3041  * 172.16.2.1/32
3042  *   unicast-ip4-chain
3043  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3044  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3045  * 224.0.0.0/8
3046  *   unicast-ip4-chain
3047  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3048  *     [0] [@0]: dpo-drop ip6
3049  * 240.0.0.0/8
3050  *   unicast-ip4-chain
3051  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3052  *     [0] [@0]: dpo-drop ip6
3053  * 255.255.255.255/32
3054  *   unicast-ip4-chain
3055  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3056  *     [0] [@0]: dpo-drop ip6
3057  * @cliexend
3058 ?*/
3059 /* *INDENT-OFF* */
3060 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3061 {
3062   .path = "set ip flow-hash",
3063   .short_help =
3064   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3065   .function = set_ip_flow_hash_command_fn,
3066 };
3067 /* *INDENT-ON* */
3068
3069 #ifndef CLIB_MARCH_VARIANT
3070 int
3071 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3072                              u32 table_index)
3073 {
3074   vnet_main_t *vnm = vnet_get_main ();
3075   vnet_interface_main_t *im = &vnm->interface_main;
3076   ip4_main_t *ipm = &ip4_main;
3077   ip_lookup_main_t *lm = &ipm->lookup_main;
3078   vnet_classify_main_t *cm = &vnet_classify_main;
3079   ip4_address_t *if_addr;
3080
3081   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3082     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3083
3084   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3085     return VNET_API_ERROR_NO_SUCH_ENTRY;
3086
3087   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3088   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3089
3090   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3091
3092   if (NULL != if_addr)
3093     {
3094       fib_prefix_t pfx = {
3095         .fp_len = 32,
3096         .fp_proto = FIB_PROTOCOL_IP4,
3097         .fp_addr.ip4 = *if_addr,
3098       };
3099       u32 fib_index;
3100
3101       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3102                                                        sw_if_index);
3103
3104
3105       if (table_index != (u32) ~ 0)
3106         {
3107           dpo_id_t dpo = DPO_INVALID;
3108
3109           dpo_set (&dpo,
3110                    DPO_CLASSIFY,
3111                    DPO_PROTO_IP4,
3112                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3113
3114           fib_table_entry_special_dpo_add (fib_index,
3115                                            &pfx,
3116                                            FIB_SOURCE_CLASSIFY,
3117                                            FIB_ENTRY_FLAG_NONE, &dpo);
3118           dpo_reset (&dpo);
3119         }
3120       else
3121         {
3122           fib_table_entry_special_remove (fib_index,
3123                                           &pfx, FIB_SOURCE_CLASSIFY);
3124         }
3125     }
3126
3127   return 0;
3128 }
3129 #endif
3130
3131 static clib_error_t *
3132 set_ip_classify_command_fn (vlib_main_t * vm,
3133                             unformat_input_t * input,
3134                             vlib_cli_command_t * cmd)
3135 {
3136   u32 table_index = ~0;
3137   int table_index_set = 0;
3138   u32 sw_if_index = ~0;
3139   int rv;
3140
3141   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3142     {
3143       if (unformat (input, "table-index %d", &table_index))
3144         table_index_set = 1;
3145       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3146                          vnet_get_main (), &sw_if_index))
3147         ;
3148       else
3149         break;
3150     }
3151
3152   if (table_index_set == 0)
3153     return clib_error_return (0, "classify table-index must be specified");
3154
3155   if (sw_if_index == ~0)
3156     return clib_error_return (0, "interface / subif must be specified");
3157
3158   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3159
3160   switch (rv)
3161     {
3162     case 0:
3163       break;
3164
3165     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3166       return clib_error_return (0, "No such interface");
3167
3168     case VNET_API_ERROR_NO_SUCH_ENTRY:
3169       return clib_error_return (0, "No such classifier table");
3170     }
3171   return 0;
3172 }
3173
3174 /*?
3175  * Assign a classification table to an interface. The classification
3176  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3177  * commands. Once the table is create, use this command to filter packets
3178  * on an interface.
3179  *
3180  * @cliexpar
3181  * Example of how to assign a classification table to an interface:
3182  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3183 ?*/
3184 /* *INDENT-OFF* */
3185 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3186 {
3187     .path = "set ip classify",
3188     .short_help =
3189     "set ip classify intfc <interface> table-index <classify-idx>",
3190     .function = set_ip_classify_command_fn,
3191 };
3192 /* *INDENT-ON* */
3193
3194 static clib_error_t *
3195 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3196 {
3197   ip4_main_t *im = &ip4_main;
3198   uword heapsize = 0;
3199
3200   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3201     {
3202       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3203         ;
3204       else
3205         return clib_error_return (0,
3206                                   "invalid heap-size parameter `%U'",
3207                                   format_unformat_error, input);
3208     }
3209
3210   im->mtrie_heap_size = heapsize;
3211
3212   return 0;
3213 }
3214
3215 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3216
3217 /*
3218  * fd.io coding-style-patch-verification: ON
3219  *
3220  * Local Variables:
3221  * eval: (c-set-style "gnu")
3222  * End:
3223  */