40c396c4f3b40444067be822f790d6a0e48c9aad
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381   if (a->address_length <= 30)
382     {
383       pfx_special.fp_len = a->address_length;
384       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386       /* set the glean route for the prefix */
387       fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                        FIB_SOURCE_INTERFACE,
389                                        (FIB_ENTRY_FLAG_CONNECTED |
390                                         FIB_ENTRY_FLAG_ATTACHED),
391                                        DPO_PROTO_IP4,
392                                        /* No next-hop address */
393                                        NULL,
394                                        sw_if_index,
395                                        /* invalid FIB index */
396                                        ~0,
397                                        1,
398                                        /* no out-label stack */
399                                        NULL,
400                                        FIB_ROUTE_PATH_FLAG_NONE);
401
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if:
529    * - deleting last intf addr in prefix
530    * - deleting intf addr used as default source address in glean adjacency
531    *
532    * We're done now otherwise
533    */
534   if ((if_prefix->ref_count > 0) &&
535       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541
542       /* remove glean route for prefix */
543       pfx_special.fp_addr.ip4 = *address;
544       pfx_special.fp_len = address_length;
545       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547       /* if no more intf addresses in prefix, remove other special routes */
548       if (!if_prefix->ref_count)
549         {
550           /* first address in prefix */
551           pfx_special.fp_addr.ip4.as_u32 =
552             address->as_u32 & im->fib_masks[address_length];
553           pfx_special.fp_len = 32;
554
555           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556           fib_table_entry_special_remove (fib_index,
557                                           &pfx_special,
558                                           FIB_SOURCE_INTERFACE);
559
560           /* prefix broadcast address */
561           pfx_special.fp_addr.ip4.as_u32 =
562             address->as_u32 | ~im->fib_masks[address_length];
563           pfx_special.fp_len = 32;
564
565           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566           fib_table_entry_special_remove (fib_index,
567                                           &pfx_special,
568                                           FIB_SOURCE_INTERFACE);
569         }
570       else
571         /* default source addr just got deleted, find another */
572         {
573           ip_interface_address_t *new_src_ia = NULL;
574           ip4_address_t *new_src_addr = NULL;
575
576           new_src_addr =
577             ip4_interface_address_matching_destination
578               (im, address, sw_if_index, &new_src_ia);
579
580           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582           pfx_special.fp_len = address_length;
583           pfx_special.fp_addr.ip4 = *new_src_addr;
584
585           /* set new glean route for the prefix */
586           fib_table_entry_update_one_path (fib_index, &pfx_special,
587                                            FIB_SOURCE_INTERFACE,
588                                            (FIB_ENTRY_FLAG_CONNECTED |
589                                             FIB_ENTRY_FLAG_ATTACHED),
590                                            DPO_PROTO_IP4,
591                                            /* No next-hop address */
592                                            NULL,
593                                            sw_if_index,
594                                            /* invalid FIB index */
595                                            ~0,
596                                            1,
597                                            /* no out-label stack */
598                                            NULL,
599                                            FIB_ROUTE_PATH_FLAG_NONE);
600           return;
601         }
602     }
603   /* length == 31, delete attached route for the other address */
604   else if (address_length == 31)
605     {
606       pfx_special.fp_addr.ip4.as_u32 =
607         address->as_u32 ^ clib_host_to_net_u32(1);
608
609       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610     }
611
612   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613   pool_put (lm->if_prefix_pool, if_prefix);
614 }
615
616 static void
617 ip4_del_interface_routes (u32 sw_if_index,
618                           ip4_main_t * im,
619                           u32 fib_index,
620                           ip4_address_t * address, u32 address_length)
621 {
622   fib_prefix_t pfx = {
623     .fp_len = address_length,
624     .fp_proto = FIB_PROTOCOL_IP4,
625     .fp_addr.ip4 = *address,
626   };
627
628   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629                                    address, address_length);
630
631   pfx.fp_len = 32;
632   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633 }
634
635 #ifndef CLIB_MARCH_VARIANT
636 void
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638 {
639   ip4_main_t *im = &ip4_main;
640
641   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643   /*
644    * enable/disable only on the 1<->0 transition
645    */
646   if (is_enable)
647     {
648       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649         return;
650     }
651   else
652     {
653       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655         return;
656     }
657   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658                                !is_enable, 0, 0);
659
660
661   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662                                sw_if_index, !is_enable, 0, 0);
663
664   {
665     ip4_enable_disable_interface_callback_t *cb;
666     vec_foreach (cb, im->enable_disable_interface_callbacks)
667       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668   }
669 }
670
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
673                                         u32 sw_if_index,
674                                         ip4_address_t * address,
675                                         u32 address_length, u32 is_del)
676 {
677   vnet_main_t *vnm = vnet_get_main ();
678   ip4_main_t *im = &ip4_main;
679   ip_lookup_main_t *lm = &im->lookup_main;
680   clib_error_t *error = 0;
681   u32 if_address_index, elts_before;
682   ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684   /* local0 interface doesn't support IP addressing  */
685   if (sw_if_index == 0)
686     {
687       return
688        clib_error_create ("local0 interface doesn't support IP addressing");
689     }
690
691   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692   ip4_addr_fib_init (&ip4_af, address,
693                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694   vec_add1 (addr_fib, ip4_af);
695
696   /*
697    * there is no support for adj-fib handling in the presence of overlapping
698    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699    * most routers do.
700    */
701   /* *INDENT-OFF* */
702   if (!is_del)
703     {
704       /* When adding an address check that it does not conflict
705          with an existing address on any interface in this table. */
706       ip_interface_address_t *ia;
707       vnet_sw_interface_t *sif;
708
709       pool_foreach(sif, vnm->interface_main.sw_interfaces,
710       ({
711           if (im->fib_index_by_sw_if_index[sw_if_index] ==
712               im->fib_index_by_sw_if_index[sif->sw_if_index])
713             {
714               foreach_ip_interface_address
715                 (&im->lookup_main, ia, sif->sw_if_index,
716                  0 /* honor unnumbered */ ,
717                  ({
718                    ip4_address_t * x =
719                      ip_interface_address_get_address
720                      (&im->lookup_main, ia);
721                    if (ip4_destination_matches_route
722                        (im, address, x, ia->address_length) ||
723                        ip4_destination_matches_route (im,
724                                                       x,
725                                                       address,
726                                                       address_length))
727                      {
728                        /* an intf may have >1 addr from the same prefix */
729                        if ((sw_if_index == sif->sw_if_index) &&
730                            (ia->address_length == address_length) &&
731                            (x->as_u32 != address->as_u32))
732                          continue;
733
734                        /* error if the length or intf was different */
735                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                        return
738                          clib_error_create
739                          ("failed to add %U on %U which conflicts with %U for interface %U",
740                           format_ip4_address_and_length, address,
741                           address_length,
742                           format_vnet_sw_if_index_name, vnm,
743                           sw_if_index,
744                           format_ip4_address_and_length, x,
745                           ia->address_length,
746                           format_vnet_sw_if_index_name, vnm,
747                           sif->sw_if_index);
748                      }
749                  }));
750             }
751       }));
752     }
753   /* *INDENT-ON* */
754
755   elts_before = pool_elts (lm->if_address_pool);
756
757   error = ip_interface_address_add_del
758     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
759   if (error)
760     goto done;
761
762   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
763
764   /* intf addr routes are added/deleted on admin up/down */
765   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766     {
767       if (is_del)
768         ip4_del_interface_routes (sw_if_index,
769                                   im, ip4_af.fib_index, address,
770                                   address_length);
771       else
772         ip4_add_interface_routes (sw_if_index,
773                                   im, ip4_af.fib_index,
774                                   pool_elt_at_index
775                                   (lm->if_address_pool, if_address_index));
776     }
777
778   /* If pool did not grow/shrink: add duplicate address. */
779   if (elts_before != pool_elts (lm->if_address_pool))
780     {
781       ip4_add_del_interface_address_callback_t *cb;
782       vec_foreach (cb, im->add_del_interface_address_callbacks)
783         cb->function (im, cb->function_opaque, sw_if_index,
784                       address, address_length, if_address_index, is_del);
785     }
786
787 done:
788   vec_free (addr_fib);
789   return error;
790 }
791
792 clib_error_t *
793 ip4_add_del_interface_address (vlib_main_t * vm,
794                                u32 sw_if_index,
795                                ip4_address_t * address,
796                                u32 address_length, u32 is_del)
797 {
798   return ip4_add_del_interface_address_internal
799     (vm, sw_if_index, address, address_length, is_del);
800 }
801
802 void
803 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
804 {
805   ip_interface_address_t *ia;
806   ip4_main_t *im;
807
808   im = &ip4_main;
809
810   /*
811    * when directed broadcast is enabled, the subnet braodcast route will forward
812    * packets using an adjacency with a broadcast MAC. otherwise it drops
813    */
814   /* *INDENT-OFF* */
815   foreach_ip_interface_address(&im->lookup_main, ia,
816                                sw_if_index, 0,
817      ({
818        if (ia->address_length <= 30)
819          {
820            ip4_address_t *ipa;
821
822            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
823
824            fib_prefix_t pfx = {
825              .fp_len = 32,
826              .fp_proto = FIB_PROTOCOL_IP4,
827              .fp_addr = {
828                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
829              },
830            };
831
832            ip4_add_subnet_bcast_route
833              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
834                                                   sw_if_index),
835               &pfx, sw_if_index);
836          }
837      }));
838   /* *INDENT-ON* */
839 }
840 #endif
841
842 static clib_error_t *
843 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
844 {
845   ip4_main_t *im = &ip4_main;
846   ip_interface_address_t *ia;
847   ip4_address_t *a;
848   u32 is_admin_up, fib_index;
849
850   /* Fill in lookup tables with default table (0). */
851   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
852
853   vec_validate_init_empty (im->
854                            lookup_main.if_address_pool_index_by_sw_if_index,
855                            sw_if_index, ~0);
856
857   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
858
859   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860
861   /* *INDENT-OFF* */
862   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
863                                 0 /* honor unnumbered */,
864   ({
865     a = ip_interface_address_get_address (&im->lookup_main, ia);
866     if (is_admin_up)
867       ip4_add_interface_routes (sw_if_index,
868                                 im, fib_index,
869                                 ia);
870     else
871       ip4_del_interface_routes (sw_if_index,
872                                 im, fib_index,
873                                 a, ia->address_length);
874   }));
875   /* *INDENT-ON* */
876
877   return 0;
878 }
879
880 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
881
882 /* Built-in ip4 unicast rx feature path definition */
883 /* *INDENT-OFF* */
884 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
885 {
886   .arc_name = "ip4-unicast",
887   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
888   .last_in_arc = "ip4-lookup",
889   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 };
891
892 VNET_FEATURE_INIT (ip4_flow_classify, static) =
893 {
894   .arc_name = "ip4-unicast",
895   .node_name = "ip4-flow-classify",
896   .runs_before = VNET_FEATURES ("ip4-inacl"),
897 };
898
899 VNET_FEATURE_INIT (ip4_inacl, static) =
900 {
901   .arc_name = "ip4-unicast",
902   .node_name = "ip4-inacl",
903   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 };
905
906 VNET_FEATURE_INIT (ip4_source_check_1, static) =
907 {
908   .arc_name = "ip4-unicast",
909   .node_name = "ip4-source-check-via-rx",
910   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 };
912
913 VNET_FEATURE_INIT (ip4_source_check_2, static) =
914 {
915   .arc_name = "ip4-unicast",
916   .node_name = "ip4-source-check-via-any",
917   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 };
919
920 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
921 {
922   .arc_name = "ip4-unicast",
923   .node_name = "ip4-source-and-port-range-check-rx",
924   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 };
926
927 VNET_FEATURE_INIT (ip4_policer_classify, static) =
928 {
929   .arc_name = "ip4-unicast",
930   .node_name = "ip4-policer-classify",
931   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 };
933
934 VNET_FEATURE_INIT (ip4_ipsec, static) =
935 {
936   .arc_name = "ip4-unicast",
937   .node_name = "ipsec4-input-feature",
938   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 };
940
941 VNET_FEATURE_INIT (ip4_vpath, static) =
942 {
943   .arc_name = "ip4-unicast",
944   .node_name = "vpath-input-ip4",
945   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 };
947
948 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
949 {
950   .arc_name = "ip4-unicast",
951   .node_name = "ip4-vxlan-bypass",
952   .runs_before = VNET_FEATURES ("ip4-lookup"),
953 };
954
955 VNET_FEATURE_INIT (ip4_not_enabled, static) =
956 {
957   .arc_name = "ip4-unicast",
958   .node_name = "ip4-not-enabled",
959   .runs_before = VNET_FEATURES ("ip4-lookup"),
960 };
961
962 VNET_FEATURE_INIT (ip4_lookup, static) =
963 {
964   .arc_name = "ip4-unicast",
965   .node_name = "ip4-lookup",
966   .runs_before = 0,     /* not before any other features */
967 };
968
969 /* Built-in ip4 multicast rx feature path definition */
970 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
971 {
972   .arc_name = "ip4-multicast",
973   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
974   .last_in_arc = "ip4-mfib-forward-lookup",
975   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 };
977
978 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
979 {
980   .arc_name = "ip4-multicast",
981   .node_name = "vpath-input-ip4",
982   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 };
984
985 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
986 {
987   .arc_name = "ip4-multicast",
988   .node_name = "ip4-not-enabled",
989   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 };
991
992 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
993 {
994   .arc_name = "ip4-multicast",
995   .node_name = "ip4-mfib-forward-lookup",
996   .runs_before = 0,     /* last feature */
997 };
998
999 /* Source and port-range check ip4 tx feature path definition */
1000 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1001 {
1002   .arc_name = "ip4-output",
1003   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1004   .last_in_arc = "interface-output",
1005   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 };
1007
1008 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1009 {
1010   .arc_name = "ip4-output",
1011   .node_name = "ip4-source-and-port-range-check-tx",
1012   .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 };
1014
1015 VNET_FEATURE_INIT (ip4_outacl, static) =
1016 {
1017   .arc_name = "ip4-output",
1018   .node_name = "ip4-outacl",
1019   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 };
1021
1022 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1023 {
1024   .arc_name = "ip4-output",
1025   .node_name = "ipsec4-output-feature",
1026   .runs_before = VNET_FEATURES ("interface-output"),
1027 };
1028
1029 /* Built-in ip4 tx feature path definition */
1030 VNET_FEATURE_INIT (ip4_interface_output, static) =
1031 {
1032   .arc_name = "ip4-output",
1033   .node_name = "interface-output",
1034   .runs_before = 0,     /* not before any other features */
1035 };
1036 /* *INDENT-ON* */
1037
1038 static clib_error_t *
1039 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1040 {
1041   ip4_main_t *im = &ip4_main;
1042
1043   /* Fill in lookup tables with default table (0). */
1044   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1045   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1046
1047   if (!is_add)
1048     {
1049       ip4_main_t *im4 = &ip4_main;
1050       ip_lookup_main_t *lm4 = &im4->lookup_main;
1051       ip_interface_address_t *ia = 0;
1052       ip4_address_t *address;
1053       vlib_main_t *vm = vlib_get_main ();
1054
1055       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1056       /* *INDENT-OFF* */
1057       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1058       ({
1059         address = ip_interface_address_get_address (lm4, ia);
1060         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1061       }));
1062       /* *INDENT-ON* */
1063     }
1064
1065   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066                                is_add, 0, 0);
1067
1068   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1069                                sw_if_index, is_add, 0, 0);
1070
1071   return /* no error */ 0;
1072 }
1073
1074 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1075
1076 /* Global IP4 main. */
1077 #ifndef CLIB_MARCH_VARIANT
1078 ip4_main_t ip4_main;
1079 #endif /* CLIB_MARCH_VARIANT */
1080
1081 static clib_error_t *
1082 ip4_lookup_init (vlib_main_t * vm)
1083 {
1084   ip4_main_t *im = &ip4_main;
1085   clib_error_t *error;
1086   uword i;
1087
1088   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1089     return error;
1090   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1091     return (error);
1092   if ((error = vlib_call_init_function (vm, fib_module_init)))
1093     return error;
1094   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095     return error;
1096
1097   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1098     {
1099       u32 m;
1100
1101       if (i < 32)
1102         m = pow2_mask (i) << (32 - i);
1103       else
1104         m = ~0;
1105       im->fib_masks[i] = clib_host_to_net_u32 (m);
1106     }
1107
1108   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1109
1110   /* Create FIB with index 0 and table id of 0. */
1111   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112                                      FIB_SOURCE_DEFAULT_ROUTE);
1113   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1114                                       MFIB_SOURCE_DEFAULT_ROUTE);
1115
1116   {
1117     pg_node_t *pn;
1118     pn = pg_get_node (ip4_lookup_node.index);
1119     pn->unformat_edit = unformat_pg_ip4_header;
1120   }
1121
1122   {
1123     ethernet_arp_header_t h;
1124
1125     clib_memset (&h, 0, sizeof (h));
1126
1127 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1128 #define _8(f,v) h.f = v;
1129     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1130     _16 (l3_type, ETHERNET_TYPE_IP4);
1131     _8 (n_l2_address_bytes, 6);
1132     _8 (n_l3_address_bytes, 4);
1133     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1134 #undef _16
1135 #undef _8
1136
1137     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138                                /* data */ &h,
1139                                sizeof (h),
1140                                /* alloc chunk size */ 8,
1141                                "ip4 arp");
1142   }
1143
1144   return error;
1145 }
1146
1147 VLIB_INIT_FUNCTION (ip4_lookup_init);
1148
1149 typedef struct
1150 {
1151   /* Adjacency taken. */
1152   u32 dpo_index;
1153   u32 flow_hash;
1154   u32 fib_index;
1155
1156   /* Packet data, possibly *after* rewrite. */
1157   u8 packet_data[64 - 1 * sizeof (u32)];
1158 }
1159 ip4_forward_next_trace_t;
1160
1161 #ifndef CLIB_MARCH_VARIANT
1162 u8 *
1163 format_ip4_forward_next_trace (u8 * s, va_list * args)
1164 {
1165   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1166   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1167   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1168   u32 indent = format_get_indent (s);
1169   s = format (s, "%U%U",
1170               format_white_space, indent,
1171               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1172   return s;
1173 }
1174 #endif
1175
1176 static u8 *
1177 format_ip4_lookup_trace (u8 * s, va_list * args)
1178 {
1179   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182   u32 indent = format_get_indent (s);
1183
1184   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1185               t->fib_index, t->dpo_index, t->flow_hash);
1186   s = format (s, "\n%U%U",
1187               format_white_space, indent,
1188               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1189   return s;
1190 }
1191
1192 static u8 *
1193 format_ip4_rewrite_trace (u8 * s, va_list * args)
1194 {
1195   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198   u32 indent = format_get_indent (s);
1199
1200   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1201               t->fib_index, t->dpo_index, format_ip_adjacency,
1202               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1203   s = format (s, "\n%U%U",
1204               format_white_space, indent,
1205               format_ip_adjacency_packet_data,
1206               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1207   return s;
1208 }
1209
1210 #ifndef CLIB_MARCH_VARIANT
1211 /* Common trace function for all ip4-forward next nodes. */
1212 void
1213 ip4_forward_next_trace (vlib_main_t * vm,
1214                         vlib_node_runtime_t * node,
1215                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 {
1217   u32 *from, n_left;
1218   ip4_main_t *im = &ip4_main;
1219
1220   n_left = frame->n_vectors;
1221   from = vlib_frame_vector_args (frame);
1222
1223   while (n_left >= 4)
1224     {
1225       u32 bi0, bi1;
1226       vlib_buffer_t *b0, *b1;
1227       ip4_forward_next_trace_t *t0, *t1;
1228
1229       /* Prefetch next iteration. */
1230       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1231       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1232
1233       bi0 = from[0];
1234       bi1 = from[1];
1235
1236       b0 = vlib_get_buffer (vm, bi0);
1237       b1 = vlib_get_buffer (vm, bi1);
1238
1239       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1240         {
1241           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1242           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1243           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1244           t0->fib_index =
1245             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1246              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1247             vec_elt (im->fib_index_by_sw_if_index,
1248                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1249
1250           clib_memcpy_fast (t0->packet_data,
1251                             vlib_buffer_get_current (b0),
1252                             sizeof (t0->packet_data));
1253         }
1254       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1255         {
1256           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1257           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1258           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1259           t1->fib_index =
1260             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1261              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1262             vec_elt (im->fib_index_by_sw_if_index,
1263                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1264           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1265                             sizeof (t1->packet_data));
1266         }
1267       from += 2;
1268       n_left -= 2;
1269     }
1270
1271   while (n_left >= 1)
1272     {
1273       u32 bi0;
1274       vlib_buffer_t *b0;
1275       ip4_forward_next_trace_t *t0;
1276
1277       bi0 = from[0];
1278
1279       b0 = vlib_get_buffer (vm, bi0);
1280
1281       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1284           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1285           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1286           t0->fib_index =
1287             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1292                             sizeof (t0->packet_data));
1293         }
1294       from += 1;
1295       n_left -= 1;
1296     }
1297 }
1298
1299 /* Compute TCP/UDP/ICMP4 checksum in software. */
1300 u16
1301 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1302                               ip4_header_t * ip0)
1303 {
1304   ip_csum_t sum0;
1305   u32 ip_header_length, payload_length_host_byte_order;
1306
1307   /* Initialize checksum with ip header. */
1308   ip_header_length = ip4_header_bytes (ip0);
1309   payload_length_host_byte_order =
1310     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1311   sum0 =
1312     clib_host_to_net_u32 (payload_length_host_byte_order +
1313                           (ip0->protocol << 16));
1314
1315   if (BITS (uword) == 32)
1316     {
1317       sum0 =
1318         ip_csum_with_carry (sum0,
1319                             clib_mem_unaligned (&ip0->src_address, u32));
1320       sum0 =
1321         ip_csum_with_carry (sum0,
1322                             clib_mem_unaligned (&ip0->dst_address, u32));
1323     }
1324   else
1325     sum0 =
1326       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1327
1328   return ip_calculate_l4_checksum (vm, p0, sum0,
1329                                    payload_length_host_byte_order, (u8 *) ip0,
1330                                    ip_header_length, NULL);
1331 }
1332
1333 u32
1334 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1335 {
1336   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1337   udp_header_t *udp0;
1338   u16 sum16;
1339
1340   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1341           || ip0->protocol == IP_PROTOCOL_UDP);
1342
1343   udp0 = (void *) (ip0 + 1);
1344   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1345     {
1346       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1347                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1348       return p0->flags;
1349     }
1350
1351   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1352
1353   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1354                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1355
1356   return p0->flags;
1357 }
1358 #endif
1359
1360 /* *INDENT-OFF* */
1361 VNET_FEATURE_ARC_INIT (ip4_local) =
1362 {
1363   .arc_name  = "ip4-local",
1364   .start_nodes = VNET_FEATURES ("ip4-local"),
1365   .last_in_arc = "ip4-local-end-of-arc",
1366 };
1367 /* *INDENT-ON* */
1368
1369 static inline void
1370 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1371                             ip4_header_t * ip, u8 is_udp, u8 * error,
1372                             u8 * good_tcp_udp)
1373 {
1374   u32 flags0;
1375   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1376   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1377   if (is_udp)
1378     {
1379       udp_header_t *udp;
1380       u32 ip_len, udp_len;
1381       i32 len_diff;
1382       udp = ip4_next_header (ip);
1383       /* Verify UDP length. */
1384       ip_len = clib_net_to_host_u16 (ip->length);
1385       udp_len = clib_net_to_host_u16 (udp->length);
1386
1387       len_diff = ip_len - udp_len;
1388       *good_tcp_udp &= len_diff >= 0;
1389       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1390     }
1391 }
1392
1393 #define ip4_local_csum_is_offloaded(_b)                                 \
1394     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1395         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1396
1397 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1398     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1399         || ip4_local_csum_is_offloaded (_b)))
1400
1401 #define ip4_local_csum_is_valid(_b)                                     \
1402     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1403         || (ip4_local_csum_is_offloaded (_b))) != 0
1404
1405 static inline void
1406 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1407                          ip4_header_t * ih, u8 * error)
1408 {
1409   u8 is_udp, is_tcp_udp, good_tcp_udp;
1410
1411   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1412   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1413
1414   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1415     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1416   else
1417     good_tcp_udp = ip4_local_csum_is_valid (b);
1418
1419   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1420   *error = (is_tcp_udp && !good_tcp_udp
1421             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1422 }
1423
1424 static inline void
1425 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1426                             ip4_header_t ** ih, u8 * error)
1427 {
1428   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1429
1430   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1431   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1432
1433   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1434   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1435
1436   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1437   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1438
1439   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1440                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1441     {
1442       if (is_tcp_udp[0])
1443         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1444                                     &good_tcp_udp[0]);
1445       if (is_tcp_udp[1])
1446         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1447                                     &good_tcp_udp[1]);
1448     }
1449
1450   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1451               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1452   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1453               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1454 }
1455
1456 static inline void
1457 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1458                               vlib_buffer_t * b, u16 * next, u8 error,
1459                               u8 head_of_feature_arc)
1460 {
1461   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1462   u32 next_index;
1463
1464   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1465   b->error = error ? error_node->errors[error] : 0;
1466   if (head_of_feature_arc)
1467     {
1468       next_index = *next;
1469       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1470         {
1471           vnet_feature_arc_start (arc_index,
1472                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1473                                   &next_index, b);
1474           *next = next_index;
1475         }
1476     }
1477 }
1478
1479 typedef struct
1480 {
1481   ip4_address_t src;
1482   u32 lbi;
1483   u8 error;
1484   u8 first;
1485 } ip4_local_last_check_t;
1486
1487 static inline void
1488 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1489                      ip4_local_last_check_t * last_check, u8 * error0)
1490 {
1491   ip4_fib_mtrie_leaf_t leaf0;
1492   ip4_fib_mtrie_t *mtrie0;
1493   const dpo_id_t *dpo0;
1494   load_balance_t *lb0;
1495   u32 lbi0;
1496
1497   vnet_buffer (b)->ip.fib_index =
1498     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1499     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1500
1501   /*
1502    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1503    *  adjacency for the destination address (the local interface address).
1504    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1505    *  adjacency for the source address (the remote sender's address)
1506    */
1507   if (PREDICT_FALSE (last_check->first ||
1508                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1509     {
1510       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1511       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1512       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1513       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1514       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1515
1516       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1517         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1518       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1519
1520       lb0 = load_balance_get (lbi0);
1521       dpo0 = load_balance_get_bucket_i (lb0, 0);
1522
1523       /*
1524        * Must have a route to source otherwise we drop the packet.
1525        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1526        *
1527        * The checks are:
1528        *  - the source is a recieve => it's from us => bogus, do this
1529        *    first since it sets a different error code.
1530        *  - uRPF check for any route to source - accept if passes.
1531        *  - allow packets destined to the broadcast address from unknown sources
1532        */
1533
1534       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1535                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1536                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1537       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1538                   && !fib_urpf_check_size (lb0->lb_urpf)
1539                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1540                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1541
1542       last_check->src.as_u32 = ip0->src_address.as_u32;
1543       last_check->lbi = lbi0;
1544       last_check->error = *error0;
1545     }
1546   else
1547     {
1548       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1549         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1550       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1551       *error0 = last_check->error;
1552       last_check->first = 0;
1553     }
1554 }
1555
1556 static inline void
1557 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1558                         ip4_local_last_check_t * last_check, u8 * error)
1559 {
1560   ip4_fib_mtrie_leaf_t leaf[2];
1561   ip4_fib_mtrie_t *mtrie[2];
1562   const dpo_id_t *dpo[2];
1563   load_balance_t *lb[2];
1564   u32 not_last_hit;
1565   u32 lbi[2];
1566
1567   not_last_hit = last_check->first;
1568   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1569   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1570
1571   vnet_buffer (b[0])->ip.fib_index =
1572     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1573     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1574     vnet_buffer (b[0])->ip.fib_index;
1575
1576   vnet_buffer (b[1])->ip.fib_index =
1577     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1578     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1579     vnet_buffer (b[1])->ip.fib_index;
1580
1581   /*
1582    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1583    *  adjacency for the destination address (the local interface address).
1584    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1585    *  adjacency for the source address (the remote sender's address)
1586    */
1587   if (PREDICT_FALSE (not_last_hit))
1588     {
1589       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1590       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1591
1592       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1593       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1594
1595       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1596                                            &ip[0]->src_address, 2);
1597       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1598                                            &ip[1]->src_address, 2);
1599
1600       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1601                                            &ip[0]->src_address, 3);
1602       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1603                                            &ip[1]->src_address, 3);
1604
1605       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1606       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1607
1608       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1609         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1610       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1611
1612       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1613         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1614       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1615
1616       lb[0] = load_balance_get (lbi[0]);
1617       lb[1] = load_balance_get (lbi[1]);
1618
1619       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1620       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1621
1622       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1623                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1624                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1625       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1626                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1627                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1628                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1629
1630       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1631                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1632                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1633       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1634                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1635                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1636                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1637
1638       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1639       last_check->lbi = lbi[1];
1640       last_check->error = error[1];
1641     }
1642   else
1643     {
1644       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1645         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1646       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1647
1648       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1649         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1650       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1651
1652       error[0] = last_check->error;
1653       error[1] = last_check->error;
1654       last_check->first = 0;
1655     }
1656 }
1657
1658 enum ip_local_packet_type_e
1659 {
1660   IP_LOCAL_PACKET_TYPE_L4,
1661   IP_LOCAL_PACKET_TYPE_NAT,
1662   IP_LOCAL_PACKET_TYPE_FRAG,
1663 };
1664
1665 /**
1666  * Determine packet type and next node.
1667  *
1668  * The expectation is that all packets that are not L4 will skip
1669  * checksums and source checks.
1670  */
1671 always_inline u8
1672 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1673 {
1674   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1675
1676   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1677     {
1678       *next = IP_LOCAL_NEXT_REASSEMBLY;
1679       return IP_LOCAL_PACKET_TYPE_FRAG;
1680     }
1681   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1682     {
1683       *next = lm->local_next_by_ip_protocol[ip->protocol];
1684       return IP_LOCAL_PACKET_TYPE_NAT;
1685     }
1686
1687   *next = lm->local_next_by_ip_protocol[ip->protocol];
1688   return IP_LOCAL_PACKET_TYPE_L4;
1689 }
1690
1691 static inline uword
1692 ip4_local_inline (vlib_main_t * vm,
1693                   vlib_node_runtime_t * node,
1694                   vlib_frame_t * frame, int head_of_feature_arc)
1695 {
1696   u32 *from, n_left_from;
1697   vlib_node_runtime_t *error_node =
1698     vlib_node_get_runtime (vm, ip4_input_node.index);
1699   u16 nexts[VLIB_FRAME_SIZE], *next;
1700   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1701   ip4_header_t *ip[2];
1702   u8 error[2], pt[2];
1703
1704   ip4_local_last_check_t last_check = {
1705     /*
1706      * 0.0.0.0 can appear as the source address of an IP packet,
1707      * as can any other address, hence the need to use the 'first'
1708      * member to make sure the .lbi is initialised for the first
1709      * packet.
1710      */
1711     .src = {.as_u32 = 0},
1712     .lbi = ~0,
1713     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1714     .first = 1,
1715   };
1716
1717   from = vlib_frame_vector_args (frame);
1718   n_left_from = frame->n_vectors;
1719
1720   if (node->flags & VLIB_NODE_FLAG_TRACE)
1721     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1722
1723   vlib_get_buffers (vm, from, bufs, n_left_from);
1724   b = bufs;
1725   next = nexts;
1726
1727   while (n_left_from >= 6)
1728     {
1729       u8 not_batch = 0;
1730
1731       /* Prefetch next iteration. */
1732       {
1733         vlib_prefetch_buffer_header (b[4], LOAD);
1734         vlib_prefetch_buffer_header (b[5], LOAD);
1735
1736         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1737         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1738       }
1739
1740       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1741
1742       ip[0] = vlib_buffer_get_current (b[0]);
1743       ip[1] = vlib_buffer_get_current (b[1]);
1744
1745       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1746       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1747
1748       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1749       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1750
1751       not_batch = pt[0] ^ pt[1];
1752
1753       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1754         goto skip_checks;
1755
1756       if (PREDICT_TRUE (not_batch == 0))
1757         {
1758           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1759           ip4_local_check_src_x2 (b, ip, &last_check, error);
1760         }
1761       else
1762         {
1763           if (!pt[0])
1764             {
1765               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1766               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1767             }
1768           if (!pt[1])
1769             {
1770               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1771               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1772             }
1773         }
1774
1775     skip_checks:
1776
1777       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1778                                     head_of_feature_arc);
1779       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1780                                     head_of_feature_arc);
1781
1782       b += 2;
1783       next += 2;
1784       n_left_from -= 2;
1785     }
1786
1787   while (n_left_from > 0)
1788     {
1789       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1790
1791       ip[0] = vlib_buffer_get_current (b[0]);
1792       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1793       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1794
1795       if (head_of_feature_arc == 0 || pt[0])
1796         goto skip_check;
1797
1798       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1799       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1800
1801     skip_check:
1802
1803       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1804                                     head_of_feature_arc);
1805
1806       b += 1;
1807       next += 1;
1808       n_left_from -= 1;
1809     }
1810
1811   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1812   return frame->n_vectors;
1813 }
1814
1815 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1816                                vlib_frame_t * frame)
1817 {
1818   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1819 }
1820
1821 /* *INDENT-OFF* */
1822 VLIB_REGISTER_NODE (ip4_local_node) =
1823 {
1824   .name = "ip4-local",
1825   .vector_size = sizeof (u32),
1826   .format_trace = format_ip4_forward_next_trace,
1827   .n_next_nodes = IP_LOCAL_N_NEXT,
1828   .next_nodes =
1829   {
1830     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1831     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1832     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1833     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1834     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1835   },
1836 };
1837 /* *INDENT-ON* */
1838
1839
1840 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1841                                           vlib_node_runtime_t * node,
1842                                           vlib_frame_t * frame)
1843 {
1844   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1845 }
1846
1847 /* *INDENT-OFF* */
1848 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1849   .name = "ip4-local-end-of-arc",
1850   .vector_size = sizeof (u32),
1851
1852   .format_trace = format_ip4_forward_next_trace,
1853   .sibling_of = "ip4-local",
1854 };
1855
1856 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1857   .arc_name = "ip4-local",
1858   .node_name = "ip4-local-end-of-arc",
1859   .runs_before = 0, /* not before any other features */
1860 };
1861 /* *INDENT-ON* */
1862
1863 #ifndef CLIB_MARCH_VARIANT
1864 void
1865 ip4_register_protocol (u32 protocol, u32 node_index)
1866 {
1867   vlib_main_t *vm = vlib_get_main ();
1868   ip4_main_t *im = &ip4_main;
1869   ip_lookup_main_t *lm = &im->lookup_main;
1870
1871   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1872   lm->local_next_by_ip_protocol[protocol] =
1873     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1874 }
1875
1876 void
1877 ip4_unregister_protocol (u32 protocol)
1878 {
1879   ip4_main_t *im = &ip4_main;
1880   ip_lookup_main_t *lm = &im->lookup_main;
1881
1882   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1883   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1884 }
1885 #endif
1886
1887 static clib_error_t *
1888 show_ip_local_command_fn (vlib_main_t * vm,
1889                           unformat_input_t * input, vlib_cli_command_t * cmd)
1890 {
1891   ip4_main_t *im = &ip4_main;
1892   ip_lookup_main_t *lm = &im->lookup_main;
1893   int i;
1894
1895   vlib_cli_output (vm, "Protocols handled by ip4_local");
1896   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1897     {
1898       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1899         {
1900           u32 node_index = vlib_get_node (vm,
1901                                           ip4_local_node.index)->
1902             next_nodes[lm->local_next_by_ip_protocol[i]];
1903           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1904                            format_vlib_node_name, vm, node_index);
1905         }
1906     }
1907   return 0;
1908 }
1909
1910
1911
1912 /*?
1913  * Display the set of protocols handled by the local IPv4 stack.
1914  *
1915  * @cliexpar
1916  * Example of how to display local protocol table:
1917  * @cliexstart{show ip local}
1918  * Protocols handled by ip4_local
1919  * 1
1920  * 17
1921  * 47
1922  * @cliexend
1923 ?*/
1924 /* *INDENT-OFF* */
1925 VLIB_CLI_COMMAND (show_ip_local, static) =
1926 {
1927   .path = "show ip local",
1928   .function = show_ip_local_command_fn,
1929   .short_help = "show ip local",
1930 };
1931 /* *INDENT-ON* */
1932
1933 always_inline uword
1934 ip4_arp_inline (vlib_main_t * vm,
1935                 vlib_node_runtime_t * node,
1936                 vlib_frame_t * frame, int is_glean)
1937 {
1938   vnet_main_t *vnm = vnet_get_main ();
1939   ip4_main_t *im = &ip4_main;
1940   ip_lookup_main_t *lm = &im->lookup_main;
1941   u32 *from, *to_next_drop;
1942   uword n_left_from, n_left_to_next_drop, next_index;
1943   u32 thread_index = vm->thread_index;
1944   u64 seed;
1945
1946   if (node->flags & VLIB_NODE_FLAG_TRACE)
1947     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1948
1949   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1950
1951   from = vlib_frame_vector_args (frame);
1952   n_left_from = frame->n_vectors;
1953   next_index = node->cached_next_index;
1954   if (next_index == IP4_ARP_NEXT_DROP)
1955     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1956
1957   while (n_left_from > 0)
1958     {
1959       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1960                            to_next_drop, n_left_to_next_drop);
1961
1962       while (n_left_from > 0 && n_left_to_next_drop > 0)
1963         {
1964           u32 pi0, bi0, adj_index0, sw_if_index0;
1965           ip_adjacency_t *adj0;
1966           vlib_buffer_t *p0, *b0;
1967           ip4_address_t resolve0;
1968           ethernet_arp_header_t *h0;
1969           vnet_hw_interface_t *hw_if0;
1970           u64 r0;
1971
1972           pi0 = from[0];
1973           p0 = vlib_get_buffer (vm, pi0);
1974
1975           from += 1;
1976           n_left_from -= 1;
1977           to_next_drop[0] = pi0;
1978           to_next_drop += 1;
1979           n_left_to_next_drop -= 1;
1980
1981           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1982           adj0 = adj_get (adj_index0);
1983
1984           if (is_glean)
1985             {
1986               /* resolve the packet's destination */
1987               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1988               resolve0 = ip0->dst_address;
1989             }
1990           else
1991             {
1992               /* resolve the incomplete adj */
1993               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1994             }
1995
1996           /* combine the address and interface for the hash key */
1997           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1998           r0 = (u64) resolve0.data_u32 << 32;
1999           r0 |= sw_if_index0;
2000
2001           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2002             {
2003               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2004               continue;
2005             }
2006
2007           /*
2008            * the adj has been updated to a rewrite but the node the DPO that got
2009            * us here hasn't - yet. no big deal. we'll drop while we wait.
2010            */
2011           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2012             {
2013               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2014               continue;
2015             }
2016
2017           /*
2018            * Can happen if the control-plane is programming tables
2019            * with traffic flowing; at least that's today's lame excuse.
2020            */
2021           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2022               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2023             {
2024               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2025               continue;
2026             }
2027           /* Send ARP request. */
2028           h0 =
2029             vlib_packet_template_get_packet (vm,
2030                                              &im->ip4_arp_request_packet_template,
2031                                              &bi0);
2032           /* Seems we're out of buffers */
2033           if (PREDICT_FALSE (!h0))
2034             {
2035               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2036               continue;
2037             }
2038
2039           b0 = vlib_get_buffer (vm, bi0);
2040
2041           /* copy the persistent fields from the original */
2042           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2043
2044           /* Add rewrite/encap string for ARP packet. */
2045           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2046
2047           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2048
2049           /* Src ethernet address in ARP header. */
2050           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2051                                   hw_if0->hw_address);
2052           if (is_glean)
2053             {
2054               /* The interface's source address is stashed in the Glean Adj */
2055               h0->ip4_over_ethernet[0].ip4 =
2056                 adj0->sub_type.glean.receive_addr.ip4;
2057             }
2058           else
2059             {
2060               /* Src IP address in ARP header. */
2061               if (ip4_src_address_for_packet (lm, sw_if_index0,
2062                                               &h0->ip4_over_ethernet[0].ip4))
2063                 {
2064                   /* No source address available */
2065                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2066                   vlib_buffer_free (vm, &bi0, 1);
2067                   continue;
2068                 }
2069             }
2070           h0->ip4_over_ethernet[1].ip4 = resolve0;
2071
2072           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2073
2074           vlib_buffer_copy_trace_flag (vm, p0, bi0);
2075           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2076           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2077
2078           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2079
2080           vlib_set_next_frame_buffer (vm, node,
2081                                       adj0->rewrite_header.next_index, bi0);
2082         }
2083
2084       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2085     }
2086
2087   return frame->n_vectors;
2088 }
2089
2090 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2091                              vlib_frame_t * frame)
2092 {
2093   return (ip4_arp_inline (vm, node, frame, 0));
2094 }
2095
2096 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2097                                vlib_frame_t * frame)
2098 {
2099   return (ip4_arp_inline (vm, node, frame, 1));
2100 }
2101
2102 static char *ip4_arp_error_strings[] = {
2103   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2104   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2105   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2106   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2107   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2108   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2109 };
2110
2111 /* *INDENT-OFF* */
2112 VLIB_REGISTER_NODE (ip4_arp_node) =
2113 {
2114   .name = "ip4-arp",
2115   .vector_size = sizeof (u32),
2116   .format_trace = format_ip4_forward_next_trace,
2117   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2118   .error_strings = ip4_arp_error_strings,
2119   .n_next_nodes = IP4_ARP_N_NEXT,
2120   .next_nodes =
2121   {
2122     [IP4_ARP_NEXT_DROP] = "error-drop",
2123   },
2124 };
2125
2126 VLIB_REGISTER_NODE (ip4_glean_node) =
2127 {
2128   .name = "ip4-glean",
2129   .vector_size = sizeof (u32),
2130   .format_trace = format_ip4_forward_next_trace,
2131   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2132   .error_strings = ip4_arp_error_strings,
2133   .n_next_nodes = IP4_ARP_N_NEXT,
2134   .next_nodes = {
2135   [IP4_ARP_NEXT_DROP] = "error-drop",
2136   },
2137 };
2138 /* *INDENT-ON* */
2139
2140 #define foreach_notrace_ip4_arp_error           \
2141 _(THROTTLED)                                    \
2142 _(RESOLVED)                                     \
2143 _(NO_BUFFERS)                                   \
2144 _(REQUEST_SENT)                                 \
2145 _(NON_ARP_ADJ)                                  \
2146 _(NO_SOURCE_ADDRESS)
2147
2148 static clib_error_t *
2149 arp_notrace_init (vlib_main_t * vm)
2150 {
2151   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2152
2153   /* don't trace ARP request packets */
2154 #define _(a)                                    \
2155     vnet_pcap_drop_trace_filter_add_del         \
2156         (rt->errors[IP4_ARP_ERROR_##a],         \
2157          1 /* is_add */);
2158   foreach_notrace_ip4_arp_error;
2159 #undef _
2160   return 0;
2161 }
2162
2163 VLIB_INIT_FUNCTION (arp_notrace_init);
2164
2165
2166 #ifndef CLIB_MARCH_VARIANT
2167 /* Send an ARP request to see if given destination is reachable on given interface. */
2168 clib_error_t *
2169 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2170                     u8 refresh)
2171 {
2172   vnet_main_t *vnm = vnet_get_main ();
2173   ip4_main_t *im = &ip4_main;
2174   ethernet_arp_header_t *h;
2175   ip4_address_t *src;
2176   ip_interface_address_t *ia;
2177   ip_adjacency_t *adj;
2178   vnet_hw_interface_t *hi;
2179   vnet_sw_interface_t *si;
2180   vlib_buffer_t *b;
2181   adj_index_t ai;
2182   u32 bi = 0;
2183   u8 unicast_rewrite = 0;
2184
2185   si = vnet_get_sw_interface (vnm, sw_if_index);
2186
2187   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2188     {
2189       return clib_error_return (0, "%U: interface %U down",
2190                                 format_ip4_address, dst,
2191                                 format_vnet_sw_if_index_name, vnm,
2192                                 sw_if_index);
2193     }
2194
2195   src =
2196     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2197   if (!src)
2198     {
2199       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2200       return clib_error_return
2201         (0,
2202          "no matching interface address for destination %U (interface %U)",
2203          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2204          sw_if_index);
2205     }
2206
2207   h = vlib_packet_template_get_packet (vm,
2208                                        &im->ip4_arp_request_packet_template,
2209                                        &bi);
2210
2211   if (!h)
2212     return clib_error_return (0, "ARP request packet allocation failed");
2213
2214   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2215   if (PREDICT_FALSE (!hi->hw_address))
2216     {
2217       return clib_error_return (0, "%U: interface %U do not support ip probe",
2218                                 format_ip4_address, dst,
2219                                 format_vnet_sw_if_index_name, vnm,
2220                                 sw_if_index);
2221     }
2222
2223   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2224
2225   h->ip4_over_ethernet[0].ip4 = src[0];
2226   h->ip4_over_ethernet[1].ip4 = dst[0];
2227
2228   b = vlib_get_buffer (vm, bi);
2229   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2230     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2231
2232   ip46_address_t nh = {
2233     .ip4 = *dst,
2234   };
2235
2236   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2237                             VNET_LINK_IP4, &nh, sw_if_index);
2238   adj = adj_get (ai);
2239
2240   /* Peer has been previously resolved, retrieve glean adj instead */
2241   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2242     {
2243       if (refresh)
2244         unicast_rewrite = 1;
2245       else
2246         {
2247           adj_unlock (ai);
2248           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2249                                       VNET_LINK_IP4, sw_if_index, &nh);
2250           adj = adj_get (ai);
2251         }
2252     }
2253
2254   /* Add encapsulation string for software interface (e.g. ethernet header). */
2255   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2256   if (unicast_rewrite)
2257     {
2258       u16 *etype = vlib_buffer_get_current (b) - 2;
2259       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2260     }
2261   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2262
2263   {
2264     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2265     u32 *to_next = vlib_frame_vector_args (f);
2266     to_next[0] = bi;
2267     f->n_vectors = 1;
2268     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2269   }
2270
2271   adj_unlock (ai);
2272   return /* no error */ 0;
2273 }
2274 #endif
2275
2276 typedef enum
2277 {
2278   IP4_REWRITE_NEXT_DROP,
2279   IP4_REWRITE_NEXT_ICMP_ERROR,
2280   IP4_REWRITE_NEXT_FRAGMENT,
2281   IP4_REWRITE_N_NEXT            /* Last */
2282 } ip4_rewrite_next_t;
2283
2284 /**
2285  * This bits of an IPv4 address to mask to construct a multicast
2286  * MAC address
2287  */
2288 #if CLIB_ARCH_IS_BIG_ENDIAN
2289 #define IP4_MCAST_ADDR_MASK 0x007fffff
2290 #else
2291 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2292 #endif
2293
2294 always_inline void
2295 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2296                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2297 {
2298   if (packet_len > adj_packet_bytes)
2299     {
2300       *error = IP4_ERROR_MTU_EXCEEDED;
2301       if (df)
2302         {
2303           icmp4_error_set_vnet_buffer
2304             (b, ICMP4_destination_unreachable,
2305              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2306              adj_packet_bytes);
2307           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2308         }
2309       else
2310         {
2311           /* IP fragmentation */
2312           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2313                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2314           *next = IP4_REWRITE_NEXT_FRAGMENT;
2315         }
2316     }
2317 }
2318
2319 /* Decrement TTL & update checksum.
2320    Works either endian, so no need for byte swap. */
2321 static_always_inline void
2322 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2323                             u32 * error)
2324 {
2325   i32 ttl;
2326   u32 checksum;
2327   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2328     {
2329       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2330       return;
2331     }
2332
2333   ttl = ip->ttl;
2334
2335   /* Input node should have reject packets with ttl 0. */
2336   ASSERT (ip->ttl > 0);
2337
2338   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2339   checksum += checksum >= 0xffff;
2340
2341   ip->checksum = checksum;
2342   ttl -= 1;
2343   ip->ttl = ttl;
2344
2345   /*
2346    * If the ttl drops below 1 when forwarding, generate
2347    * an ICMP response.
2348    */
2349   if (PREDICT_FALSE (ttl <= 0))
2350     {
2351       *error = IP4_ERROR_TIME_EXPIRED;
2352       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2353       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2354                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2355                                    0);
2356       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2357     }
2358
2359   /* Verify checksum. */
2360   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2361           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2362 }
2363
2364
2365 always_inline uword
2366 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2367                              vlib_node_runtime_t * node,
2368                              vlib_frame_t * frame,
2369                              int do_counters, int is_midchain, int is_mcast,
2370                              int do_gso)
2371 {
2372   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2373   u32 *from = vlib_frame_vector_args (frame);
2374   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2375   u16 nexts[VLIB_FRAME_SIZE], *next;
2376   u32 n_left_from;
2377   vlib_node_runtime_t *error_node =
2378     vlib_node_get_runtime (vm, ip4_input_node.index);
2379
2380   n_left_from = frame->n_vectors;
2381   u32 thread_index = vm->thread_index;
2382
2383   vlib_get_buffers (vm, from, bufs, n_left_from);
2384   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2385
2386 #if (CLIB_N_PREFETCHES >= 8)
2387   if (n_left_from >= 6)
2388     {
2389       int i;
2390       for (i = 2; i < 6; i++)
2391         vlib_prefetch_buffer_header (bufs[i], LOAD);
2392     }
2393
2394   next = nexts;
2395   b = bufs;
2396   while (n_left_from >= 8)
2397     {
2398       ip_adjacency_t *adj0, *adj1;
2399       ip4_header_t *ip0, *ip1;
2400       u32 rw_len0, error0, adj_index0;
2401       u32 rw_len1, error1, adj_index1;
2402       u32 tx_sw_if_index0, tx_sw_if_index1;
2403       u8 *p;
2404
2405       vlib_prefetch_buffer_header (b[6], LOAD);
2406       vlib_prefetch_buffer_header (b[7], LOAD);
2407
2408       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2409       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2410
2411       /*
2412        * pre-fetch the per-adjacency counters
2413        */
2414       if (do_counters)
2415         {
2416           vlib_prefetch_combined_counter (&adjacency_counters,
2417                                           thread_index, adj_index0);
2418           vlib_prefetch_combined_counter (&adjacency_counters,
2419                                           thread_index, adj_index1);
2420         }
2421
2422       ip0 = vlib_buffer_get_current (b[0]);
2423       ip1 = vlib_buffer_get_current (b[1]);
2424
2425       error0 = error1 = IP4_ERROR_NONE;
2426
2427       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2428       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2429
2430       /* Rewrite packet header and updates lengths. */
2431       adj0 = adj_get (adj_index0);
2432       adj1 = adj_get (adj_index1);
2433
2434       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2435       rw_len0 = adj0[0].rewrite_header.data_bytes;
2436       rw_len1 = adj1[0].rewrite_header.data_bytes;
2437       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2438       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2439
2440       p = vlib_buffer_get_current (b[2]);
2441       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2442       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2443
2444       p = vlib_buffer_get_current (b[3]);
2445       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2446       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2447
2448       /* Check MTU of outgoing interface. */
2449       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2450       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2451
2452       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2453         ip0_len = gso_mtu_sz (b[0]);
2454       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2455         ip1_len = gso_mtu_sz (b[1]);
2456
2457       ip4_mtu_check (b[0], ip0_len,
2458                      adj0[0].rewrite_header.max_l3_packet_bytes,
2459                      ip0->flags_and_fragment_offset &
2460                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2461                      next + 0, &error0);
2462       ip4_mtu_check (b[1], ip1_len,
2463                      adj1[0].rewrite_header.max_l3_packet_bytes,
2464                      ip1->flags_and_fragment_offset &
2465                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2466                      next + 1, &error1);
2467
2468       if (is_mcast)
2469         {
2470           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2471                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2472                     IP4_ERROR_SAME_INTERFACE : error0);
2473           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2474                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2475                     IP4_ERROR_SAME_INTERFACE : error1);
2476         }
2477
2478       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2479        * to see the IP header */
2480       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2481         {
2482           u32 next_index = adj0[0].rewrite_header.next_index;
2483           vlib_buffer_advance (b[0], -(word) rw_len0);
2484           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2485           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2486
2487           if (PREDICT_FALSE
2488               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2489             vnet_feature_arc_start (lm->output_feature_arc_index,
2490                                     tx_sw_if_index0, &next_index, b[0]);
2491           next[0] = next_index;
2492         }
2493       else
2494         {
2495           b[0]->error = error_node->errors[error0];
2496         }
2497       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2498         {
2499           u32 next_index = adj1[0].rewrite_header.next_index;
2500           vlib_buffer_advance (b[1], -(word) rw_len1);
2501
2502           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2503           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2504
2505           if (PREDICT_FALSE
2506               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2507             vnet_feature_arc_start (lm->output_feature_arc_index,
2508                                     tx_sw_if_index1, &next_index, b[1]);
2509           next[1] = next_index;
2510         }
2511       else
2512         {
2513           b[1]->error = error_node->errors[error1];
2514         }
2515       if (is_midchain)
2516         {
2517           calc_checksums (vm, b[0]);
2518           calc_checksums (vm, b[1]);
2519         }
2520       /* Guess we are only writing on simple Ethernet header. */
2521       vnet_rewrite_two_headers (adj0[0], adj1[0],
2522                                 ip0, ip1, sizeof (ethernet_header_t));
2523
2524       /*
2525        * Bump the per-adjacency counters
2526        */
2527       if (do_counters)
2528         {
2529           vlib_increment_combined_counter
2530             (&adjacency_counters,
2531              thread_index,
2532              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2533
2534           vlib_increment_combined_counter
2535             (&adjacency_counters,
2536              thread_index,
2537              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2538         }
2539
2540       if (is_midchain)
2541         {
2542           if (adj0->sub_type.midchain.fixup_func)
2543             adj0->sub_type.midchain.fixup_func
2544               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2545           if (adj1->sub_type.midchain.fixup_func)
2546             adj1->sub_type.midchain.fixup_func
2547               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2548         }
2549
2550       if (is_mcast)
2551         {
2552           /*
2553            * copy bytes from the IP address into the MAC rewrite
2554            */
2555           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2556                                       adj0->rewrite_header.dst_mcast_offset,
2557                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2558           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2559                                       adj1->rewrite_header.dst_mcast_offset,
2560                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2561         }
2562
2563       next += 2;
2564       b += 2;
2565       n_left_from -= 2;
2566     }
2567 #elif (CLIB_N_PREFETCHES >= 4)
2568   next = nexts;
2569   b = bufs;
2570   while (n_left_from >= 1)
2571     {
2572       ip_adjacency_t *adj0;
2573       ip4_header_t *ip0;
2574       u32 rw_len0, error0, adj_index0;
2575       u32 tx_sw_if_index0;
2576       u8 *p;
2577
2578       /* Prefetch next iteration */
2579       if (PREDICT_TRUE (n_left_from >= 4))
2580         {
2581           ip_adjacency_t *adj2;
2582           u32 adj_index2;
2583
2584           vlib_prefetch_buffer_header (b[3], LOAD);
2585           vlib_prefetch_buffer_data (b[2], LOAD);
2586
2587           /* Prefetch adj->rewrite_header */
2588           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2589           adj2 = adj_get (adj_index2);
2590           p = (u8 *) adj2;
2591           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2592                          LOAD);
2593         }
2594
2595       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2596
2597       /*
2598        * Prefetch the per-adjacency counters
2599        */
2600       if (do_counters)
2601         {
2602           vlib_prefetch_combined_counter (&adjacency_counters,
2603                                           thread_index, adj_index0);
2604         }
2605
2606       ip0 = vlib_buffer_get_current (b[0]);
2607
2608       error0 = IP4_ERROR_NONE;
2609
2610       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2611
2612       /* Rewrite packet header and updates lengths. */
2613       adj0 = adj_get (adj_index0);
2614
2615       /* Rewrite header was prefetched. */
2616       rw_len0 = adj0[0].rewrite_header.data_bytes;
2617       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2618
2619       /* Check MTU of outgoing interface. */
2620       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2621
2622       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2623         ip0_len = gso_mtu_sz (b[0]);
2624
2625       ip4_mtu_check (b[0], ip0_len,
2626                      adj0[0].rewrite_header.max_l3_packet_bytes,
2627                      ip0->flags_and_fragment_offset &
2628                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2629                      next + 0, &error0);
2630
2631       if (is_mcast)
2632         {
2633           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2634                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2635                     IP4_ERROR_SAME_INTERFACE : error0);
2636         }
2637
2638       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2639        * to see the IP header */
2640       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2641         {
2642           u32 next_index = adj0[0].rewrite_header.next_index;
2643           vlib_buffer_advance (b[0], -(word) rw_len0);
2644           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2645           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2646
2647           if (PREDICT_FALSE
2648               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2649             vnet_feature_arc_start (lm->output_feature_arc_index,
2650                                     tx_sw_if_index0, &next_index, b[0]);
2651           next[0] = next_index;
2652         }
2653       else
2654         {
2655           b[0]->error = error_node->errors[error0];
2656         }
2657       if (is_midchain)
2658         {
2659           calc_checksums (vm, b[0]);
2660         }
2661       /* Guess we are only writing on simple Ethernet header. */
2662       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2663
2664       /*
2665        * Bump the per-adjacency counters
2666        */
2667       if (do_counters)
2668         {
2669           vlib_increment_combined_counter
2670             (&adjacency_counters,
2671              thread_index,
2672              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2673         }
2674
2675       if (is_midchain)
2676         {
2677           if (adj0->sub_type.midchain.fixup_func)
2678             adj0->sub_type.midchain.fixup_func
2679               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2680         }
2681
2682       if (is_mcast)
2683         {
2684           /*
2685            * copy bytes from the IP address into the MAC rewrite
2686            */
2687           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2688                                       adj0->rewrite_header.dst_mcast_offset,
2689                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2690         }
2691
2692       next += 1;
2693       b += 1;
2694       n_left_from -= 1;
2695     }
2696 #endif
2697
2698   while (n_left_from > 0)
2699     {
2700       ip_adjacency_t *adj0;
2701       ip4_header_t *ip0;
2702       u32 rw_len0, adj_index0, error0;
2703       u32 tx_sw_if_index0;
2704
2705       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2706
2707       adj0 = adj_get (adj_index0);
2708
2709       if (do_counters)
2710         vlib_prefetch_combined_counter (&adjacency_counters,
2711                                         thread_index, adj_index0);
2712
2713       ip0 = vlib_buffer_get_current (b[0]);
2714
2715       error0 = IP4_ERROR_NONE;
2716
2717       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2718
2719
2720       /* Update packet buffer attributes/set output interface. */
2721       rw_len0 = adj0[0].rewrite_header.data_bytes;
2722       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2723
2724       /* Check MTU of outgoing interface. */
2725       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2726       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2727         ip0_len = gso_mtu_sz (b[0]);
2728
2729       ip4_mtu_check (b[0], ip0_len,
2730                      adj0[0].rewrite_header.max_l3_packet_bytes,
2731                      ip0->flags_and_fragment_offset &
2732                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2733                      next + 0, &error0);
2734
2735       if (is_mcast)
2736         {
2737           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2738                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2739                     IP4_ERROR_SAME_INTERFACE : error0);
2740         }
2741
2742       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2743        * to see the IP header */
2744       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2745         {
2746           u32 next_index = adj0[0].rewrite_header.next_index;
2747           vlib_buffer_advance (b[0], -(word) rw_len0);
2748           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2749           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2750
2751           if (PREDICT_FALSE
2752               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2753             vnet_feature_arc_start (lm->output_feature_arc_index,
2754                                     tx_sw_if_index0, &next_index, b[0]);
2755           next[0] = next_index;
2756         }
2757       else
2758         {
2759           b[0]->error = error_node->errors[error0];
2760         }
2761       if (is_midchain)
2762         {
2763           calc_checksums (vm, b[0]);
2764         }
2765       /* Guess we are only writing on simple Ethernet header. */
2766       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2767
2768       if (do_counters)
2769         vlib_increment_combined_counter
2770           (&adjacency_counters,
2771            thread_index, adj_index0, 1,
2772            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2773
2774       if (is_midchain)
2775         {
2776           if (adj0->sub_type.midchain.fixup_func)
2777             adj0->sub_type.midchain.fixup_func
2778               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2779         }
2780
2781       if (is_mcast)
2782         {
2783           /*
2784            * copy bytes from the IP address into the MAC rewrite
2785            */
2786           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2787                                       adj0->rewrite_header.dst_mcast_offset,
2788                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2789         }
2790
2791       next += 1;
2792       b += 1;
2793       n_left_from -= 1;
2794     }
2795
2796
2797   /* Need to do trace after rewrites to pick up new packet data. */
2798   if (node->flags & VLIB_NODE_FLAG_TRACE)
2799     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2800
2801   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2802   return frame->n_vectors;
2803 }
2804
2805 always_inline uword
2806 ip4_rewrite_inline (vlib_main_t * vm,
2807                     vlib_node_runtime_t * node,
2808                     vlib_frame_t * frame,
2809                     int do_counters, int is_midchain, int is_mcast)
2810 {
2811   vnet_main_t *vnm = vnet_get_main ();
2812   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2813     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2814                                         is_midchain, is_mcast,
2815                                         1 /* do_gso */ );
2816   else
2817     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2818                                         is_midchain, is_mcast,
2819                                         0 /* no do_gso */ );
2820 }
2821
2822
2823 /** @brief IPv4 rewrite node.
2824     @node ip4-rewrite
2825
2826     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2827     header checksum, fetch the ip adjacency, check the outbound mtu,
2828     apply the adjacency rewrite, and send pkts to the adjacency
2829     rewrite header's rewrite_next_index.
2830
2831     @param vm vlib_main_t corresponding to the current thread
2832     @param node vlib_node_runtime_t
2833     @param frame vlib_frame_t whose contents should be dispatched
2834
2835     @par Graph mechanics: buffer metadata, next index usage
2836
2837     @em Uses:
2838     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2839         - the rewrite adjacency index
2840     - <code>adj->lookup_next_index</code>
2841         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2842           the packet will be dropped.
2843     - <code>adj->rewrite_header</code>
2844         - Rewrite string length, rewrite string, next_index
2845
2846     @em Sets:
2847     - <code>b->current_data, b->current_length</code>
2848         - Updated net of applying the rewrite string
2849
2850     <em>Next Indices:</em>
2851     - <code> adj->rewrite_header.next_index </code>
2852       or @c ip4-drop
2853 */
2854
2855 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2856                                  vlib_frame_t * frame)
2857 {
2858   if (adj_are_counters_enabled ())
2859     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2860   else
2861     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2862 }
2863
2864 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2865                                        vlib_node_runtime_t * node,
2866                                        vlib_frame_t * frame)
2867 {
2868   if (adj_are_counters_enabled ())
2869     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2870   else
2871     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2872 }
2873
2874 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2875                                   vlib_node_runtime_t * node,
2876                                   vlib_frame_t * frame)
2877 {
2878   if (adj_are_counters_enabled ())
2879     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2880   else
2881     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2882 }
2883
2884 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2885                                        vlib_node_runtime_t * node,
2886                                        vlib_frame_t * frame)
2887 {
2888   if (adj_are_counters_enabled ())
2889     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2890   else
2891     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2892 }
2893
2894 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2895                                         vlib_node_runtime_t * node,
2896                                         vlib_frame_t * frame)
2897 {
2898   if (adj_are_counters_enabled ())
2899     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2900   else
2901     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2902 }
2903
2904 /* *INDENT-OFF* */
2905 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2906   .name = "ip4-rewrite",
2907   .vector_size = sizeof (u32),
2908
2909   .format_trace = format_ip4_rewrite_trace,
2910
2911   .n_next_nodes = IP4_REWRITE_N_NEXT,
2912   .next_nodes = {
2913     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2914     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2915     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2916   },
2917 };
2918
2919 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2920   .name = "ip4-rewrite-bcast",
2921   .vector_size = sizeof (u32),
2922
2923   .format_trace = format_ip4_rewrite_trace,
2924   .sibling_of = "ip4-rewrite",
2925 };
2926
2927 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2928   .name = "ip4-rewrite-mcast",
2929   .vector_size = sizeof (u32),
2930
2931   .format_trace = format_ip4_rewrite_trace,
2932   .sibling_of = "ip4-rewrite",
2933 };
2934
2935 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2936   .name = "ip4-mcast-midchain",
2937   .vector_size = sizeof (u32),
2938
2939   .format_trace = format_ip4_rewrite_trace,
2940   .sibling_of = "ip4-rewrite",
2941 };
2942
2943 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2944   .name = "ip4-midchain",
2945   .vector_size = sizeof (u32),
2946   .format_trace = format_ip4_forward_next_trace,
2947   .sibling_of =  "ip4-rewrite",
2948 };
2949 /* *INDENT-ON */
2950
2951 static int
2952 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2953 {
2954   ip4_fib_mtrie_t *mtrie0;
2955   ip4_fib_mtrie_leaf_t leaf0;
2956   u32 lbi0;
2957
2958   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2959
2960   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2961   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2962   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2963
2964   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2965
2966   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2967 }
2968
2969 static clib_error_t *
2970 test_lookup_command_fn (vlib_main_t * vm,
2971                         unformat_input_t * input, vlib_cli_command_t * cmd)
2972 {
2973   ip4_fib_t *fib;
2974   u32 table_id = 0;
2975   f64 count = 1;
2976   u32 n;
2977   int i;
2978   ip4_address_t ip4_base_address;
2979   u64 errors = 0;
2980
2981   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2982     {
2983       if (unformat (input, "table %d", &table_id))
2984         {
2985           /* Make sure the entry exists. */
2986           fib = ip4_fib_get (table_id);
2987           if ((fib) && (fib->index != table_id))
2988             return clib_error_return (0, "<fib-index> %d does not exist",
2989                                       table_id);
2990         }
2991       else if (unformat (input, "count %f", &count))
2992         ;
2993
2994       else if (unformat (input, "%U",
2995                          unformat_ip4_address, &ip4_base_address))
2996         ;
2997       else
2998         return clib_error_return (0, "unknown input `%U'",
2999                                   format_unformat_error, input);
3000     }
3001
3002   n = count;
3003
3004   for (i = 0; i < n; i++)
3005     {
3006       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3007         errors++;
3008
3009       ip4_base_address.as_u32 =
3010         clib_host_to_net_u32 (1 +
3011                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3012     }
3013
3014   if (errors)
3015     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3016   else
3017     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3018
3019   return 0;
3020 }
3021
3022 /*?
3023  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3024  * given FIB table to determine if there is a conflict with the
3025  * adjacency table. The fib-id can be determined by using the
3026  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3027  * of 0 is used.
3028  *
3029  * @todo This command uses fib-id, other commands use table-id (not
3030  * just a name, they are different indexes). Would like to change this
3031  * to table-id for consistency.
3032  *
3033  * @cliexpar
3034  * Example of how to run the test lookup command:
3035  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3036  * No errors in 2 lookups
3037  * @cliexend
3038 ?*/
3039 /* *INDENT-OFF* */
3040 VLIB_CLI_COMMAND (lookup_test_command, static) =
3041 {
3042   .path = "test lookup",
3043   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3044   .function = test_lookup_command_fn,
3045 };
3046 /* *INDENT-ON* */
3047
3048 #ifndef CLIB_MARCH_VARIANT
3049 int
3050 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3051 {
3052   u32 fib_index;
3053
3054   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3055
3056   if (~0 == fib_index)
3057     return VNET_API_ERROR_NO_SUCH_FIB;
3058
3059   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3060                                   flow_hash_config);
3061
3062   return 0;
3063 }
3064 #endif
3065
3066 static clib_error_t *
3067 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3068                              unformat_input_t * input,
3069                              vlib_cli_command_t * cmd)
3070 {
3071   int matched = 0;
3072   u32 table_id = 0;
3073   u32 flow_hash_config = 0;
3074   int rv;
3075
3076   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3077     {
3078       if (unformat (input, "table %d", &table_id))
3079         matched = 1;
3080 #define _(a,v) \
3081     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3082       foreach_flow_hash_bit
3083 #undef _
3084         else
3085         break;
3086     }
3087
3088   if (matched == 0)
3089     return clib_error_return (0, "unknown input `%U'",
3090                               format_unformat_error, input);
3091
3092   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3093   switch (rv)
3094     {
3095     case 0:
3096       break;
3097
3098     case VNET_API_ERROR_NO_SUCH_FIB:
3099       return clib_error_return (0, "no such FIB table %d", table_id);
3100
3101     default:
3102       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3103       break;
3104     }
3105
3106   return 0;
3107 }
3108
3109 /*?
3110  * Configure the set of IPv4 fields used by the flow hash.
3111  *
3112  * @cliexpar
3113  * Example of how to set the flow hash on a given table:
3114  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3115  * Example of display the configured flow hash:
3116  * @cliexstart{show ip fib}
3117  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3118  * 0.0.0.0/0
3119  *   unicast-ip4-chain
3120  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3121  *     [0] [@0]: dpo-drop ip6
3122  * 0.0.0.0/32
3123  *   unicast-ip4-chain
3124  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3125  *     [0] [@0]: dpo-drop ip6
3126  * 224.0.0.0/8
3127  *   unicast-ip4-chain
3128  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3129  *     [0] [@0]: dpo-drop ip6
3130  * 6.0.1.2/32
3131  *   unicast-ip4-chain
3132  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3133  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3134  * 7.0.0.1/32
3135  *   unicast-ip4-chain
3136  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3137  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3138  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3139  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3140  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3141  * 240.0.0.0/8
3142  *   unicast-ip4-chain
3143  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3144  *     [0] [@0]: dpo-drop ip6
3145  * 255.255.255.255/32
3146  *   unicast-ip4-chain
3147  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3148  *     [0] [@0]: dpo-drop ip6
3149  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3150  * 0.0.0.0/0
3151  *   unicast-ip4-chain
3152  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3153  *     [0] [@0]: dpo-drop ip6
3154  * 0.0.0.0/32
3155  *   unicast-ip4-chain
3156  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3157  *     [0] [@0]: dpo-drop ip6
3158  * 172.16.1.0/24
3159  *   unicast-ip4-chain
3160  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3161  *     [0] [@4]: ipv4-glean: af_packet0
3162  * 172.16.1.1/32
3163  *   unicast-ip4-chain
3164  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3165  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3166  * 172.16.1.2/32
3167  *   unicast-ip4-chain
3168  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3169  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3170  * 172.16.2.0/24
3171  *   unicast-ip4-chain
3172  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3173  *     [0] [@4]: ipv4-glean: af_packet1
3174  * 172.16.2.1/32
3175  *   unicast-ip4-chain
3176  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3177  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3178  * 224.0.0.0/8
3179  *   unicast-ip4-chain
3180  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3181  *     [0] [@0]: dpo-drop ip6
3182  * 240.0.0.0/8
3183  *   unicast-ip4-chain
3184  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3185  *     [0] [@0]: dpo-drop ip6
3186  * 255.255.255.255/32
3187  *   unicast-ip4-chain
3188  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3189  *     [0] [@0]: dpo-drop ip6
3190  * @cliexend
3191 ?*/
3192 /* *INDENT-OFF* */
3193 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3194 {
3195   .path = "set ip flow-hash",
3196   .short_help =
3197   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3198   .function = set_ip_flow_hash_command_fn,
3199 };
3200 /* *INDENT-ON* */
3201
3202 #ifndef CLIB_MARCH_VARIANT
3203 int
3204 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3205                              u32 table_index)
3206 {
3207   vnet_main_t *vnm = vnet_get_main ();
3208   vnet_interface_main_t *im = &vnm->interface_main;
3209   ip4_main_t *ipm = &ip4_main;
3210   ip_lookup_main_t *lm = &ipm->lookup_main;
3211   vnet_classify_main_t *cm = &vnet_classify_main;
3212   ip4_address_t *if_addr;
3213
3214   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3215     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3216
3217   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3218     return VNET_API_ERROR_NO_SUCH_ENTRY;
3219
3220   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3221   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3222
3223   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3224
3225   if (NULL != if_addr)
3226     {
3227       fib_prefix_t pfx = {
3228         .fp_len = 32,
3229         .fp_proto = FIB_PROTOCOL_IP4,
3230         .fp_addr.ip4 = *if_addr,
3231       };
3232       u32 fib_index;
3233
3234       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3235                                                        sw_if_index);
3236
3237
3238       if (table_index != (u32) ~ 0)
3239         {
3240           dpo_id_t dpo = DPO_INVALID;
3241
3242           dpo_set (&dpo,
3243                    DPO_CLASSIFY,
3244                    DPO_PROTO_IP4,
3245                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3246
3247           fib_table_entry_special_dpo_add (fib_index,
3248                                            &pfx,
3249                                            FIB_SOURCE_CLASSIFY,
3250                                            FIB_ENTRY_FLAG_NONE, &dpo);
3251           dpo_reset (&dpo);
3252         }
3253       else
3254         {
3255           fib_table_entry_special_remove (fib_index,
3256                                           &pfx, FIB_SOURCE_CLASSIFY);
3257         }
3258     }
3259
3260   return 0;
3261 }
3262 #endif
3263
3264 static clib_error_t *
3265 set_ip_classify_command_fn (vlib_main_t * vm,
3266                             unformat_input_t * input,
3267                             vlib_cli_command_t * cmd)
3268 {
3269   u32 table_index = ~0;
3270   int table_index_set = 0;
3271   u32 sw_if_index = ~0;
3272   int rv;
3273
3274   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3275     {
3276       if (unformat (input, "table-index %d", &table_index))
3277         table_index_set = 1;
3278       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3279                          vnet_get_main (), &sw_if_index))
3280         ;
3281       else
3282         break;
3283     }
3284
3285   if (table_index_set == 0)
3286     return clib_error_return (0, "classify table-index must be specified");
3287
3288   if (sw_if_index == ~0)
3289     return clib_error_return (0, "interface / subif must be specified");
3290
3291   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3292
3293   switch (rv)
3294     {
3295     case 0:
3296       break;
3297
3298     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3299       return clib_error_return (0, "No such interface");
3300
3301     case VNET_API_ERROR_NO_SUCH_ENTRY:
3302       return clib_error_return (0, "No such classifier table");
3303     }
3304   return 0;
3305 }
3306
3307 /*?
3308  * Assign a classification table to an interface. The classification
3309  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3310  * commands. Once the table is create, use this command to filter packets
3311  * on an interface.
3312  *
3313  * @cliexpar
3314  * Example of how to assign a classification table to an interface:
3315  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3316 ?*/
3317 /* *INDENT-OFF* */
3318 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3319 {
3320     .path = "set ip classify",
3321     .short_help =
3322     "set ip classify intfc <interface> table-index <classify-idx>",
3323     .function = set_ip_classify_command_fn,
3324 };
3325 /* *INDENT-ON* */
3326
3327 static clib_error_t *
3328 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3329 {
3330   ip4_main_t *im = &ip4_main;
3331   uword heapsize = 0;
3332
3333   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3334     {
3335       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3336         ;
3337       else
3338         return clib_error_return (0,
3339                                   "invalid heap-size parameter `%U'",
3340                                   format_unformat_error, input);
3341     }
3342
3343   im->mtrie_heap_size = heapsize;
3344
3345   return 0;
3346 }
3347
3348 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3349
3350 /*
3351  * fd.io coding-style-patch-verification: ON
3352  *
3353  * Local Variables:
3354  * eval: (c-set-style "gnu")
3355  * End:
3356  */