ip: allow addrs from the same prefix on intf
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381   if (a->address_length <= 30)
382     {
383       pfx_special.fp_len = a->address_length;
384       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386       /* set the glean route for the prefix */
387       fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                        FIB_SOURCE_INTERFACE,
389                                        (FIB_ENTRY_FLAG_CONNECTED |
390                                         FIB_ENTRY_FLAG_ATTACHED),
391                                        DPO_PROTO_IP4,
392                                        /* No next-hop address */
393                                        NULL,
394                                        sw_if_index,
395                                        /* invalid FIB index */
396                                        ~0,
397                                        1,
398                                        /* no out-label stack */
399                                        NULL,
400                                        FIB_ROUTE_PATH_FLAG_NONE);
401
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if:
529    * - deleting last intf addr in prefix
530    * - deleting intf addr used as default source address in glean adjacency
531    *
532    * We're done now otherwise
533    */
534   if ((if_prefix->ref_count > 0) &&
535       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541
542       /* remove glean route for prefix */
543       pfx_special.fp_addr.ip4 = *address;
544       pfx_special.fp_len = address_length;
545       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547       /* if no more intf addresses in prefix, remove other special routes */
548       if (!if_prefix->ref_count)
549         {
550           /* first address in prefix */
551           pfx_special.fp_addr.ip4.as_u32 =
552             address->as_u32 & im->fib_masks[address_length];
553           pfx_special.fp_len = 32;
554
555           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556           fib_table_entry_special_remove (fib_index,
557                                           &pfx_special,
558                                           FIB_SOURCE_INTERFACE);
559
560           /* prefix broadcast address */
561           pfx_special.fp_addr.ip4.as_u32 =
562             address->as_u32 | ~im->fib_masks[address_length];
563           pfx_special.fp_len = 32;
564
565           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566           fib_table_entry_special_remove (fib_index,
567                                           &pfx_special,
568                                           FIB_SOURCE_INTERFACE);
569         }
570       else
571         /* default source addr just got deleted, find another */
572         {
573           ip_interface_address_t *new_src_ia = NULL;
574           ip4_address_t *new_src_addr = NULL;
575
576           new_src_addr =
577             ip4_interface_address_matching_destination
578               (im, address, sw_if_index, &new_src_ia);
579
580           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582           pfx_special.fp_len = address_length;
583           pfx_special.fp_addr.ip4 = *new_src_addr;
584
585           /* set new glean route for the prefix */
586           fib_table_entry_update_one_path (fib_index, &pfx_special,
587                                            FIB_SOURCE_INTERFACE,
588                                            (FIB_ENTRY_FLAG_CONNECTED |
589                                             FIB_ENTRY_FLAG_ATTACHED),
590                                            DPO_PROTO_IP4,
591                                            /* No next-hop address */
592                                            NULL,
593                                            sw_if_index,
594                                            /* invalid FIB index */
595                                            ~0,
596                                            1,
597                                            /* no out-label stack */
598                                            NULL,
599                                            FIB_ROUTE_PATH_FLAG_NONE);
600           return;
601         }
602     }
603   /* length == 31, delete attached route for the other address */
604   else if (address_length == 31)
605     {
606       pfx_special.fp_addr.ip4.as_u32 =
607         address->as_u32 ^ clib_host_to_net_u32(1);
608
609       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610     }
611
612   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613   pool_put (lm->if_prefix_pool, if_prefix);
614 }
615
616 static void
617 ip4_del_interface_routes (u32 sw_if_index,
618                           ip4_main_t * im,
619                           u32 fib_index,
620                           ip4_address_t * address, u32 address_length)
621 {
622   fib_prefix_t pfx = {
623     .fp_len = address_length,
624     .fp_proto = FIB_PROTOCOL_IP4,
625     .fp_addr.ip4 = *address,
626   };
627
628   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629                                    address, address_length);
630
631   pfx.fp_len = 32;
632   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633 }
634
635 #ifndef CLIB_MARCH_VARIANT
636 void
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638 {
639   ip4_main_t *im = &ip4_main;
640
641   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643   /*
644    * enable/disable only on the 1<->0 transition
645    */
646   if (is_enable)
647     {
648       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649         return;
650     }
651   else
652     {
653       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655         return;
656     }
657   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658                                !is_enable, 0, 0);
659
660
661   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662                                sw_if_index, !is_enable, 0, 0);
663
664   {
665     ip4_enable_disable_interface_callback_t *cb;
666     vec_foreach (cb, im->enable_disable_interface_callbacks)
667       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668   }
669 }
670
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
673                                         u32 sw_if_index,
674                                         ip4_address_t * address,
675                                         u32 address_length, u32 is_del)
676 {
677   vnet_main_t *vnm = vnet_get_main ();
678   ip4_main_t *im = &ip4_main;
679   ip_lookup_main_t *lm = &im->lookup_main;
680   clib_error_t *error = 0;
681   u32 if_address_index, elts_before;
682   ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684   /* local0 interface doesn't support IP addressing  */
685   if (sw_if_index == 0)
686     {
687       return
688        clib_error_create ("local0 interface doesn't support IP addressing");
689     }
690
691   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692   ip4_addr_fib_init (&ip4_af, address,
693                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694   vec_add1 (addr_fib, ip4_af);
695
696   /*
697    * there is no support for adj-fib handling in the presence of overlapping
698    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699    * most routers do.
700    */
701   /* *INDENT-OFF* */
702   if (!is_del)
703     {
704       /* When adding an address check that it does not conflict
705          with an existing address on any interface in this table. */
706       ip_interface_address_t *ia;
707       vnet_sw_interface_t *sif;
708
709       pool_foreach(sif, vnm->interface_main.sw_interfaces,
710       ({
711           if (im->fib_index_by_sw_if_index[sw_if_index] ==
712               im->fib_index_by_sw_if_index[sif->sw_if_index])
713             {
714               foreach_ip_interface_address
715                 (&im->lookup_main, ia, sif->sw_if_index,
716                  0 /* honor unnumbered */ ,
717                  ({
718                    ip4_address_t * x =
719                      ip_interface_address_get_address
720                      (&im->lookup_main, ia);
721                    if (ip4_destination_matches_route
722                        (im, address, x, ia->address_length) ||
723                        ip4_destination_matches_route (im,
724                                                       x,
725                                                       address,
726                                                       address_length))
727                      {
728                        /* an intf may have >1 addr from the same prefix */
729                        if ((sw_if_index == sif->sw_if_index) &&
730                            (ia->address_length == address_length) &&
731                            (x->as_u32 != address->as_u32))
732                          continue;
733
734                        /* error if the length or intf was different */
735                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                        return
738                          clib_error_create
739                          ("failed to add %U which conflicts with %U for interface %U",
740                           format_ip4_address_and_length, address,
741                           address_length,
742                           format_ip4_address_and_length, x,
743                           ia->address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sif->sw_if_index);
746                      }
747                  }));
748             }
749       }));
750     }
751   /* *INDENT-ON* */
752
753   elts_before = pool_elts (lm->if_address_pool);
754
755   error = ip_interface_address_add_del
756     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
757   if (error)
758     goto done;
759
760   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
761
762   /* intf addr routes are added/deleted on admin up/down */
763   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
764     {
765       if (is_del)
766         ip4_del_interface_routes (sw_if_index,
767                                   im, ip4_af.fib_index, address,
768                                   address_length);
769       else
770         ip4_add_interface_routes (sw_if_index,
771                                   im, ip4_af.fib_index,
772                                   pool_elt_at_index
773                                   (lm->if_address_pool, if_address_index));
774     }
775
776   /* If pool did not grow/shrink: add duplicate address. */
777   if (elts_before != pool_elts (lm->if_address_pool))
778     {
779       ip4_add_del_interface_address_callback_t *cb;
780       vec_foreach (cb, im->add_del_interface_address_callbacks)
781         cb->function (im, cb->function_opaque, sw_if_index,
782                       address, address_length, if_address_index, is_del);
783     }
784
785 done:
786   vec_free (addr_fib);
787   return error;
788 }
789
790 clib_error_t *
791 ip4_add_del_interface_address (vlib_main_t * vm,
792                                u32 sw_if_index,
793                                ip4_address_t * address,
794                                u32 address_length, u32 is_del)
795 {
796   return ip4_add_del_interface_address_internal
797     (vm, sw_if_index, address, address_length, is_del);
798 }
799
800 void
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
802 {
803   ip_interface_address_t *ia;
804   ip4_main_t *im;
805
806   im = &ip4_main;
807
808   /*
809    * when directed broadcast is enabled, the subnet braodcast route will forward
810    * packets using an adjacency with a broadcast MAC. otherwise it drops
811    */
812   /* *INDENT-OFF* */
813   foreach_ip_interface_address(&im->lookup_main, ia,
814                                sw_if_index, 0,
815      ({
816        if (ia->address_length <= 30)
817          {
818            ip4_address_t *ipa;
819
820            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
821
822            fib_prefix_t pfx = {
823              .fp_len = 32,
824              .fp_proto = FIB_PROTOCOL_IP4,
825              .fp_addr = {
826                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
827              },
828            };
829
830            ip4_add_subnet_bcast_route
831              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
832                                                   sw_if_index),
833               &pfx, sw_if_index);
834          }
835      }));
836   /* *INDENT-ON* */
837 }
838 #endif
839
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
842 {
843   ip4_main_t *im = &ip4_main;
844   ip_interface_address_t *ia;
845   ip4_address_t *a;
846   u32 is_admin_up, fib_index;
847
848   /* Fill in lookup tables with default table (0). */
849   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
850
851   vec_validate_init_empty (im->
852                            lookup_main.if_address_pool_index_by_sw_if_index,
853                            sw_if_index, ~0);
854
855   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
856
857   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
858
859   /* *INDENT-OFF* */
860   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861                                 0 /* honor unnumbered */,
862   ({
863     a = ip_interface_address_get_address (&im->lookup_main, ia);
864     if (is_admin_up)
865       ip4_add_interface_routes (sw_if_index,
866                                 im, fib_index,
867                                 ia);
868     else
869       ip4_del_interface_routes (sw_if_index,
870                                 im, fib_index,
871                                 a, ia->address_length);
872   }));
873   /* *INDENT-ON* */
874
875   return 0;
876 }
877
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
879
880 /* Built-in ip4 unicast rx feature path definition */
881 /* *INDENT-OFF* */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
883 {
884   .arc_name = "ip4-unicast",
885   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886   .last_in_arc = "ip4-lookup",
887   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
888 };
889
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
891 {
892   .arc_name = "ip4-unicast",
893   .node_name = "ip4-flow-classify",
894   .runs_before = VNET_FEATURES ("ip4-inacl"),
895 };
896
897 VNET_FEATURE_INIT (ip4_inacl, static) =
898 {
899   .arc_name = "ip4-unicast",
900   .node_name = "ip4-inacl",
901   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
902 };
903
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
905 {
906   .arc_name = "ip4-unicast",
907   .node_name = "ip4-source-check-via-rx",
908   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
909 };
910
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
912 {
913   .arc_name = "ip4-unicast",
914   .node_name = "ip4-source-check-via-any",
915   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
916 };
917
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-source-and-port-range-check-rx",
922   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
923 };
924
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-policer-classify",
929   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
930 };
931
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ipsec4-input-feature",
936   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
937 };
938
939 VNET_FEATURE_INIT (ip4_vpath, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "vpath-input-ip4",
943   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
944 };
945
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-vxlan-bypass",
950   .runs_before = VNET_FEATURES ("ip4-lookup"),
951 };
952
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-not-enabled",
957   .runs_before = VNET_FEATURES ("ip4-lookup"),
958 };
959
960 VNET_FEATURE_INIT (ip4_lookup, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-lookup",
964   .runs_before = 0,     /* not before any other features */
965 };
966
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
969 {
970   .arc_name = "ip4-multicast",
971   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972   .last_in_arc = "ip4-mfib-forward-lookup",
973   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
974 };
975
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
977 {
978   .arc_name = "ip4-multicast",
979   .node_name = "vpath-input-ip4",
980   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
981 };
982
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
984 {
985   .arc_name = "ip4-multicast",
986   .node_name = "ip4-not-enabled",
987   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
988 };
989
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "ip4-mfib-forward-lookup",
994   .runs_before = 0,     /* last feature */
995 };
996
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
999 {
1000   .arc_name = "ip4-output",
1001   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002   .last_in_arc = "interface-output",
1003   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1007 {
1008   .arc_name = "ip4-output",
1009   .node_name = "ip4-source-and-port-range-check-tx",
1010   .runs_before = VNET_FEATURES ("ip4-outacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1014 {
1015   .arc_name = "ip4-output",
1016   .node_name = "ip4-outacl",
1017   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ipsec4-output-feature",
1024   .runs_before = VNET_FEATURES ("interface-output"),
1025 };
1026
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1029 {
1030   .arc_name = "ip4-output",
1031   .node_name = "interface-output",
1032   .runs_before = 0,     /* not before any other features */
1033 };
1034 /* *INDENT-ON* */
1035
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1038 {
1039   ip4_main_t *im = &ip4_main;
1040
1041   /* Fill in lookup tables with default table (0). */
1042   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1044
1045   if (!is_add)
1046     {
1047       ip4_main_t *im4 = &ip4_main;
1048       ip_lookup_main_t *lm4 = &im4->lookup_main;
1049       ip_interface_address_t *ia = 0;
1050       ip4_address_t *address;
1051       vlib_main_t *vm = vlib_get_main ();
1052
1053       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1054       /* *INDENT-OFF* */
1055       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1056       ({
1057         address = ip_interface_address_get_address (lm4, ia);
1058         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1059       }));
1060       /* *INDENT-ON* */
1061     }
1062
1063   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1064                                is_add, 0, 0);
1065
1066   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067                                sw_if_index, is_add, 0, 0);
1068
1069   return /* no error */ 0;
1070 }
1071
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1073
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1078
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1081 {
1082   ip4_main_t *im = &ip4_main;
1083   clib_error_t *error;
1084   uword i;
1085
1086   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1087     return error;
1088   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1089     return (error);
1090   if ((error = vlib_call_init_function (vm, fib_module_init)))
1091     return error;
1092   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1093     return error;
1094
1095   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1096     {
1097       u32 m;
1098
1099       if (i < 32)
1100         m = pow2_mask (i) << (32 - i);
1101       else
1102         m = ~0;
1103       im->fib_masks[i] = clib_host_to_net_u32 (m);
1104     }
1105
1106   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1107
1108   /* Create FIB with index 0 and table id of 0. */
1109   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110                                      FIB_SOURCE_DEFAULT_ROUTE);
1111   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112                                       MFIB_SOURCE_DEFAULT_ROUTE);
1113
1114   {
1115     pg_node_t *pn;
1116     pn = pg_get_node (ip4_lookup_node.index);
1117     pn->unformat_edit = unformat_pg_ip4_header;
1118   }
1119
1120   {
1121     ethernet_arp_header_t h;
1122
1123     clib_memset (&h, 0, sizeof (h));
1124
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128     _16 (l3_type, ETHERNET_TYPE_IP4);
1129     _8 (n_l2_address_bytes, 6);
1130     _8 (n_l3_address_bytes, 4);
1131     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1132 #undef _16
1133 #undef _8
1134
1135     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1136                                /* data */ &h,
1137                                sizeof (h),
1138                                /* alloc chunk size */ 8,
1139                                "ip4 arp");
1140   }
1141
1142   return error;
1143 }
1144
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1146
1147 typedef struct
1148 {
1149   /* Adjacency taken. */
1150   u32 dpo_index;
1151   u32 flow_hash;
1152   u32 fib_index;
1153
1154   /* Packet data, possibly *after* rewrite. */
1155   u8 packet_data[64 - 1 * sizeof (u32)];
1156 }
1157 ip4_forward_next_trace_t;
1158
1159 #ifndef CLIB_MARCH_VARIANT
1160 u8 *
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1162 {
1163   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166   u32 indent = format_get_indent (s);
1167   s = format (s, "%U%U",
1168               format_white_space, indent,
1169               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1170   return s;
1171 }
1172 #endif
1173
1174 static u8 *
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1176 {
1177   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180   u32 indent = format_get_indent (s);
1181
1182   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183               t->fib_index, t->dpo_index, t->flow_hash);
1184   s = format (s, "\n%U%U",
1185               format_white_space, indent,
1186               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1187   return s;
1188 }
1189
1190 static u8 *
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1192 {
1193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196   u32 indent = format_get_indent (s);
1197
1198   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199               t->fib_index, t->dpo_index, format_ip_adjacency,
1200               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201   s = format (s, "\n%U%U",
1202               format_white_space, indent,
1203               format_ip_adjacency_packet_data,
1204               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1205   return s;
1206 }
1207
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1210 void
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212                         vlib_node_runtime_t * node,
1213                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1214 {
1215   u32 *from, n_left;
1216   ip4_main_t *im = &ip4_main;
1217
1218   n_left = frame->n_vectors;
1219   from = vlib_frame_vector_args (frame);
1220
1221   while (n_left >= 4)
1222     {
1223       u32 bi0, bi1;
1224       vlib_buffer_t *b0, *b1;
1225       ip4_forward_next_trace_t *t0, *t1;
1226
1227       /* Prefetch next iteration. */
1228       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1230
1231       bi0 = from[0];
1232       bi1 = from[1];
1233
1234       b0 = vlib_get_buffer (vm, bi0);
1235       b1 = vlib_get_buffer (vm, bi1);
1236
1237       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1238         {
1239           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1242           t0->fib_index =
1243             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245             vec_elt (im->fib_index_by_sw_if_index,
1246                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1247
1248           clib_memcpy_fast (t0->packet_data,
1249                             vlib_buffer_get_current (b0),
1250                             sizeof (t0->packet_data));
1251         }
1252       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1253         {
1254           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1257           t1->fib_index =
1258             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260             vec_elt (im->fib_index_by_sw_if_index,
1261                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263                             sizeof (t1->packet_data));
1264         }
1265       from += 2;
1266       n_left -= 2;
1267     }
1268
1269   while (n_left >= 1)
1270     {
1271       u32 bi0;
1272       vlib_buffer_t *b0;
1273       ip4_forward_next_trace_t *t0;
1274
1275       bi0 = from[0];
1276
1277       b0 = vlib_get_buffer (vm, bi0);
1278
1279       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1280         {
1281           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1284           t0->fib_index =
1285             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287             vec_elt (im->fib_index_by_sw_if_index,
1288                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290                             sizeof (t0->packet_data));
1291         }
1292       from += 1;
1293       n_left -= 1;
1294     }
1295 }
1296
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1298 u16
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1300                               ip4_header_t * ip0)
1301 {
1302   ip_csum_t sum0;
1303   u32 ip_header_length, payload_length_host_byte_order;
1304   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1305   u16 sum16;
1306   void *data_this_buffer;
1307
1308   /* Initialize checksum with ip header. */
1309   ip_header_length = ip4_header_bytes (ip0);
1310   payload_length_host_byte_order =
1311     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1312   sum0 =
1313     clib_host_to_net_u32 (payload_length_host_byte_order +
1314                           (ip0->protocol << 16));
1315
1316   if (BITS (uword) == 32)
1317     {
1318       sum0 =
1319         ip_csum_with_carry (sum0,
1320                             clib_mem_unaligned (&ip0->src_address, u32));
1321       sum0 =
1322         ip_csum_with_carry (sum0,
1323                             clib_mem_unaligned (&ip0->dst_address, u32));
1324     }
1325   else
1326     sum0 =
1327       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1328
1329   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1330   data_this_buffer = (void *) ip0 + ip_header_length;
1331   n_ip_bytes_this_buffer =
1332     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1333   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1334     {
1335       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1336         n_ip_bytes_this_buffer - ip_header_length : 0;
1337     }
1338   while (1)
1339     {
1340       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1341       n_bytes_left -= n_this_buffer;
1342       if (n_bytes_left == 0)
1343         break;
1344
1345       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1346       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1347         return 0xfefe;
1348
1349       p0 = vlib_get_buffer (vm, p0->next_buffer);
1350       data_this_buffer = vlib_buffer_get_current (p0);
1351       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1352     }
1353
1354   sum16 = ~ip_csum_fold (sum0);
1355
1356   return sum16;
1357 }
1358
1359 u32
1360 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1361 {
1362   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1363   udp_header_t *udp0;
1364   u16 sum16;
1365
1366   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1367           || ip0->protocol == IP_PROTOCOL_UDP);
1368
1369   udp0 = (void *) (ip0 + 1);
1370   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1371     {
1372       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1373                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1374       return p0->flags;
1375     }
1376
1377   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1378
1379   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1380                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1381
1382   return p0->flags;
1383 }
1384 #endif
1385
1386 /* *INDENT-OFF* */
1387 VNET_FEATURE_ARC_INIT (ip4_local) =
1388 {
1389   .arc_name  = "ip4-local",
1390   .start_nodes = VNET_FEATURES ("ip4-local"),
1391   .last_in_arc = "ip4-local-end-of-arc",
1392 };
1393 /* *INDENT-ON* */
1394
1395 static inline void
1396 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1397                             ip4_header_t * ip, u8 is_udp, u8 * error,
1398                             u8 * good_tcp_udp)
1399 {
1400   u32 flags0;
1401   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1402   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1403   if (is_udp)
1404     {
1405       udp_header_t *udp;
1406       u32 ip_len, udp_len;
1407       i32 len_diff;
1408       udp = ip4_next_header (ip);
1409       /* Verify UDP length. */
1410       ip_len = clib_net_to_host_u16 (ip->length);
1411       udp_len = clib_net_to_host_u16 (udp->length);
1412
1413       len_diff = ip_len - udp_len;
1414       *good_tcp_udp &= len_diff >= 0;
1415       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1416     }
1417 }
1418
1419 #define ip4_local_csum_is_offloaded(_b)                                 \
1420     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1421         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1422
1423 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1424     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1425         || ip4_local_csum_is_offloaded (_b)))
1426
1427 #define ip4_local_csum_is_valid(_b)                                     \
1428     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1429         || (ip4_local_csum_is_offloaded (_b))) != 0
1430
1431 static inline void
1432 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1433                          ip4_header_t * ih, u8 * error)
1434 {
1435   u8 is_udp, is_tcp_udp, good_tcp_udp;
1436
1437   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1438   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1439
1440   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1441     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1442   else
1443     good_tcp_udp = ip4_local_csum_is_valid (b);
1444
1445   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1446   *error = (is_tcp_udp && !good_tcp_udp
1447             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1448 }
1449
1450 static inline void
1451 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1452                             ip4_header_t ** ih, u8 * error)
1453 {
1454   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1455
1456   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1457   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1458
1459   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1460   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1461
1462   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1463   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1464
1465   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1466                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1467     {
1468       if (is_tcp_udp[0])
1469         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1470                                     &good_tcp_udp[0]);
1471       if (is_tcp_udp[1])
1472         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1473                                     &good_tcp_udp[1]);
1474     }
1475
1476   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1477               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1478   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1479               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1480 }
1481
1482 static inline void
1483 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1484                               vlib_buffer_t * b, u16 * next, u8 error,
1485                               u8 head_of_feature_arc)
1486 {
1487   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1488   u32 next_index;
1489
1490   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1491   b->error = error ? error_node->errors[error] : 0;
1492   if (head_of_feature_arc)
1493     {
1494       next_index = *next;
1495       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1496         {
1497           vnet_feature_arc_start (arc_index,
1498                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1499                                   &next_index, b);
1500           *next = next_index;
1501         }
1502     }
1503 }
1504
1505 typedef struct
1506 {
1507   ip4_address_t src;
1508   u32 lbi;
1509   u8 error;
1510   u8 first;
1511 } ip4_local_last_check_t;
1512
1513 static inline void
1514 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1515                      ip4_local_last_check_t * last_check, u8 * error0)
1516 {
1517   ip4_fib_mtrie_leaf_t leaf0;
1518   ip4_fib_mtrie_t *mtrie0;
1519   const dpo_id_t *dpo0;
1520   load_balance_t *lb0;
1521   u32 lbi0;
1522
1523   vnet_buffer (b)->ip.fib_index =
1524     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1525     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1526
1527   /*
1528    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1529    *  adjacency for the destination address (the local interface address).
1530    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1531    *  adjacency for the source address (the remote sender's address)
1532    */
1533   if (PREDICT_FALSE (last_check->first ||
1534                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1535     {
1536       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1537       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1538       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1539       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1540       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1541
1542       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1543         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1544       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1545
1546       lb0 = load_balance_get (lbi0);
1547       dpo0 = load_balance_get_bucket_i (lb0, 0);
1548
1549       /*
1550        * Must have a route to source otherwise we drop the packet.
1551        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1552        *
1553        * The checks are:
1554        *  - the source is a recieve => it's from us => bogus, do this
1555        *    first since it sets a different error code.
1556        *  - uRPF check for any route to source - accept if passes.
1557        *  - allow packets destined to the broadcast address from unknown sources
1558        */
1559
1560       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1561                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1562                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1563       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564                   && !fib_urpf_check_size (lb0->lb_urpf)
1565                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1566                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1567
1568       last_check->src.as_u32 = ip0->src_address.as_u32;
1569       last_check->lbi = lbi0;
1570       last_check->error = *error0;
1571     }
1572   else
1573     {
1574       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1575         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1576       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1577       *error0 = last_check->error;
1578       last_check->first = 0;
1579     }
1580 }
1581
1582 static inline void
1583 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1584                         ip4_local_last_check_t * last_check, u8 * error)
1585 {
1586   ip4_fib_mtrie_leaf_t leaf[2];
1587   ip4_fib_mtrie_t *mtrie[2];
1588   const dpo_id_t *dpo[2];
1589   load_balance_t *lb[2];
1590   u32 not_last_hit;
1591   u32 lbi[2];
1592
1593   not_last_hit = last_check->first;
1594   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1595   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1596
1597   vnet_buffer (b[0])->ip.fib_index =
1598     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1599     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1600     vnet_buffer (b[0])->ip.fib_index;
1601
1602   vnet_buffer (b[1])->ip.fib_index =
1603     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1604     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1605     vnet_buffer (b[1])->ip.fib_index;
1606
1607   /*
1608    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1609    *  adjacency for the destination address (the local interface address).
1610    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1611    *  adjacency for the source address (the remote sender's address)
1612    */
1613   if (PREDICT_FALSE (not_last_hit))
1614     {
1615       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1616       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1617
1618       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1619       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1620
1621       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1622                                            &ip[0]->src_address, 2);
1623       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1624                                            &ip[1]->src_address, 2);
1625
1626       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1627                                            &ip[0]->src_address, 3);
1628       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1629                                            &ip[1]->src_address, 3);
1630
1631       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1632       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1633
1634       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1635         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1636       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1637
1638       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1639         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1640       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1641
1642       lb[0] = load_balance_get (lbi[0]);
1643       lb[1] = load_balance_get (lbi[1]);
1644
1645       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1646       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1647
1648       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1649                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1650                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1651       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1652                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1653                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1654                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1655
1656       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1658                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1659       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1660                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1661                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1662                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1663
1664       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1665       last_check->lbi = lbi[1];
1666       last_check->error = error[1];
1667     }
1668   else
1669     {
1670       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1671         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1672       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1673
1674       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1675         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1676       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1677
1678       error[0] = last_check->error;
1679       error[1] = last_check->error;
1680       last_check->first = 0;
1681     }
1682 }
1683
1684 enum ip_local_packet_type_e
1685 {
1686   IP_LOCAL_PACKET_TYPE_L4,
1687   IP_LOCAL_PACKET_TYPE_NAT,
1688   IP_LOCAL_PACKET_TYPE_FRAG,
1689 };
1690
1691 /**
1692  * Determine packet type and next node.
1693  *
1694  * The expectation is that all packets that are not L4 will skip
1695  * checksums and source checks.
1696  */
1697 always_inline u8
1698 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1699 {
1700   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1701
1702   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1703     {
1704       *next = IP_LOCAL_NEXT_REASSEMBLY;
1705       return IP_LOCAL_PACKET_TYPE_FRAG;
1706     }
1707   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1708     {
1709       *next = lm->local_next_by_ip_protocol[ip->protocol];
1710       return IP_LOCAL_PACKET_TYPE_NAT;
1711     }
1712
1713   *next = lm->local_next_by_ip_protocol[ip->protocol];
1714   return IP_LOCAL_PACKET_TYPE_L4;
1715 }
1716
1717 static inline uword
1718 ip4_local_inline (vlib_main_t * vm,
1719                   vlib_node_runtime_t * node,
1720                   vlib_frame_t * frame, int head_of_feature_arc)
1721 {
1722   u32 *from, n_left_from;
1723   vlib_node_runtime_t *error_node =
1724     vlib_node_get_runtime (vm, ip4_input_node.index);
1725   u16 nexts[VLIB_FRAME_SIZE], *next;
1726   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1727   ip4_header_t *ip[2];
1728   u8 error[2], pt[2];
1729
1730   ip4_local_last_check_t last_check = {
1731     /*
1732      * 0.0.0.0 can appear as the source address of an IP packet,
1733      * as can any other address, hence the need to use the 'first'
1734      * member to make sure the .lbi is initialised for the first
1735      * packet.
1736      */
1737     .src = {.as_u32 = 0},
1738     .lbi = ~0,
1739     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1740     .first = 1,
1741   };
1742
1743   from = vlib_frame_vector_args (frame);
1744   n_left_from = frame->n_vectors;
1745
1746   if (node->flags & VLIB_NODE_FLAG_TRACE)
1747     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1748
1749   vlib_get_buffers (vm, from, bufs, n_left_from);
1750   b = bufs;
1751   next = nexts;
1752
1753   while (n_left_from >= 6)
1754     {
1755       u8 not_batch = 0;
1756
1757       /* Prefetch next iteration. */
1758       {
1759         vlib_prefetch_buffer_header (b[4], LOAD);
1760         vlib_prefetch_buffer_header (b[5], LOAD);
1761
1762         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1763         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1764       }
1765
1766       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1767
1768       ip[0] = vlib_buffer_get_current (b[0]);
1769       ip[1] = vlib_buffer_get_current (b[1]);
1770
1771       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1772       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1773
1774       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1775       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1776
1777       not_batch = pt[0] ^ pt[1];
1778
1779       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1780         goto skip_checks;
1781
1782       if (PREDICT_TRUE (not_batch == 0))
1783         {
1784           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1785           ip4_local_check_src_x2 (b, ip, &last_check, error);
1786         }
1787       else
1788         {
1789           if (!pt[0])
1790             {
1791               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1792               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1793             }
1794           if (!pt[1])
1795             {
1796               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1797               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1798             }
1799         }
1800
1801     skip_checks:
1802
1803       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1804                                     head_of_feature_arc);
1805       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1806                                     head_of_feature_arc);
1807
1808       b += 2;
1809       next += 2;
1810       n_left_from -= 2;
1811     }
1812
1813   while (n_left_from > 0)
1814     {
1815       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1816
1817       ip[0] = vlib_buffer_get_current (b[0]);
1818       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1819       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1820
1821       if (head_of_feature_arc == 0 || pt[0])
1822         goto skip_check;
1823
1824       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1825       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1826
1827     skip_check:
1828
1829       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1830                                     head_of_feature_arc);
1831
1832       b += 1;
1833       next += 1;
1834       n_left_from -= 1;
1835     }
1836
1837   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1838   return frame->n_vectors;
1839 }
1840
1841 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1842                                vlib_frame_t * frame)
1843 {
1844   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1845 }
1846
1847 /* *INDENT-OFF* */
1848 VLIB_REGISTER_NODE (ip4_local_node) =
1849 {
1850   .name = "ip4-local",
1851   .vector_size = sizeof (u32),
1852   .format_trace = format_ip4_forward_next_trace,
1853   .n_next_nodes = IP_LOCAL_N_NEXT,
1854   .next_nodes =
1855   {
1856     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1857     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1858     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1859     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1860     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1861   },
1862 };
1863 /* *INDENT-ON* */
1864
1865
1866 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1867                                           vlib_node_runtime_t * node,
1868                                           vlib_frame_t * frame)
1869 {
1870   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1871 }
1872
1873 /* *INDENT-OFF* */
1874 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1875   .name = "ip4-local-end-of-arc",
1876   .vector_size = sizeof (u32),
1877
1878   .format_trace = format_ip4_forward_next_trace,
1879   .sibling_of = "ip4-local",
1880 };
1881
1882 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1883   .arc_name = "ip4-local",
1884   .node_name = "ip4-local-end-of-arc",
1885   .runs_before = 0, /* not before any other features */
1886 };
1887 /* *INDENT-ON* */
1888
1889 #ifndef CLIB_MARCH_VARIANT
1890 void
1891 ip4_register_protocol (u32 protocol, u32 node_index)
1892 {
1893   vlib_main_t *vm = vlib_get_main ();
1894   ip4_main_t *im = &ip4_main;
1895   ip_lookup_main_t *lm = &im->lookup_main;
1896
1897   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1898   lm->local_next_by_ip_protocol[protocol] =
1899     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1900 }
1901
1902 void
1903 ip4_unregister_protocol (u32 protocol)
1904 {
1905   ip4_main_t *im = &ip4_main;
1906   ip_lookup_main_t *lm = &im->lookup_main;
1907
1908   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1909   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1910 }
1911 #endif
1912
1913 static clib_error_t *
1914 show_ip_local_command_fn (vlib_main_t * vm,
1915                           unformat_input_t * input, vlib_cli_command_t * cmd)
1916 {
1917   ip4_main_t *im = &ip4_main;
1918   ip_lookup_main_t *lm = &im->lookup_main;
1919   int i;
1920
1921   vlib_cli_output (vm, "Protocols handled by ip4_local");
1922   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1923     {
1924       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1925         {
1926           u32 node_index = vlib_get_node (vm,
1927                                           ip4_local_node.index)->
1928             next_nodes[lm->local_next_by_ip_protocol[i]];
1929           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1930                            format_vlib_node_name, vm, node_index);
1931         }
1932     }
1933   return 0;
1934 }
1935
1936
1937
1938 /*?
1939  * Display the set of protocols handled by the local IPv4 stack.
1940  *
1941  * @cliexpar
1942  * Example of how to display local protocol table:
1943  * @cliexstart{show ip local}
1944  * Protocols handled by ip4_local
1945  * 1
1946  * 17
1947  * 47
1948  * @cliexend
1949 ?*/
1950 /* *INDENT-OFF* */
1951 VLIB_CLI_COMMAND (show_ip_local, static) =
1952 {
1953   .path = "show ip local",
1954   .function = show_ip_local_command_fn,
1955   .short_help = "show ip local",
1956 };
1957 /* *INDENT-ON* */
1958
1959 always_inline uword
1960 ip4_arp_inline (vlib_main_t * vm,
1961                 vlib_node_runtime_t * node,
1962                 vlib_frame_t * frame, int is_glean)
1963 {
1964   vnet_main_t *vnm = vnet_get_main ();
1965   ip4_main_t *im = &ip4_main;
1966   ip_lookup_main_t *lm = &im->lookup_main;
1967   u32 *from, *to_next_drop;
1968   uword n_left_from, n_left_to_next_drop, next_index;
1969   u32 thread_index = vm->thread_index;
1970   u64 seed;
1971
1972   if (node->flags & VLIB_NODE_FLAG_TRACE)
1973     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1974
1975   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1976
1977   from = vlib_frame_vector_args (frame);
1978   n_left_from = frame->n_vectors;
1979   next_index = node->cached_next_index;
1980   if (next_index == IP4_ARP_NEXT_DROP)
1981     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1982
1983   while (n_left_from > 0)
1984     {
1985       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1986                            to_next_drop, n_left_to_next_drop);
1987
1988       while (n_left_from > 0 && n_left_to_next_drop > 0)
1989         {
1990           u32 pi0, bi0, adj_index0, sw_if_index0;
1991           ip_adjacency_t *adj0;
1992           vlib_buffer_t *p0, *b0;
1993           ip4_address_t resolve0;
1994           ethernet_arp_header_t *h0;
1995           vnet_hw_interface_t *hw_if0;
1996           u64 r0;
1997
1998           pi0 = from[0];
1999           p0 = vlib_get_buffer (vm, pi0);
2000
2001           from += 1;
2002           n_left_from -= 1;
2003           to_next_drop[0] = pi0;
2004           to_next_drop += 1;
2005           n_left_to_next_drop -= 1;
2006
2007           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2008           adj0 = adj_get (adj_index0);
2009
2010           if (is_glean)
2011             {
2012               /* resolve the packet's destination */
2013               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2014               resolve0 = ip0->dst_address;
2015             }
2016           else
2017             {
2018               /* resolve the incomplete adj */
2019               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2020             }
2021
2022           /* combine the address and interface for the hash key */
2023           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2024           r0 = (u64) resolve0.data_u32 << 32;
2025           r0 |= sw_if_index0;
2026
2027           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2028             {
2029               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2030               continue;
2031             }
2032
2033           /*
2034            * the adj has been updated to a rewrite but the node the DPO that got
2035            * us here hasn't - yet. no big deal. we'll drop while we wait.
2036            */
2037           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2038             {
2039               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2040               continue;
2041             }
2042
2043           /*
2044            * Can happen if the control-plane is programming tables
2045            * with traffic flowing; at least that's today's lame excuse.
2046            */
2047           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2048               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2049             {
2050               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2051               continue;
2052             }
2053           /* Send ARP request. */
2054           h0 =
2055             vlib_packet_template_get_packet (vm,
2056                                              &im->ip4_arp_request_packet_template,
2057                                              &bi0);
2058           /* Seems we're out of buffers */
2059           if (PREDICT_FALSE (!h0))
2060             {
2061               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2062               continue;
2063             }
2064
2065           b0 = vlib_get_buffer (vm, bi0);
2066
2067           /* copy the persistent fields from the original */
2068           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2069
2070           /* Add rewrite/encap string for ARP packet. */
2071           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2072
2073           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2074
2075           /* Src ethernet address in ARP header. */
2076           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2077                                   hw_if0->hw_address);
2078           if (is_glean)
2079             {
2080               /* The interface's source address is stashed in the Glean Adj */
2081               h0->ip4_over_ethernet[0].ip4 =
2082                 adj0->sub_type.glean.receive_addr.ip4;
2083             }
2084           else
2085             {
2086               /* Src IP address in ARP header. */
2087               if (ip4_src_address_for_packet (lm, sw_if_index0,
2088                                               &h0->ip4_over_ethernet[0].ip4))
2089                 {
2090                   /* No source address available */
2091                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2092                   vlib_buffer_free (vm, &bi0, 1);
2093                   continue;
2094                 }
2095             }
2096           h0->ip4_over_ethernet[1].ip4 = resolve0;
2097
2098           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2099
2100           vlib_buffer_copy_trace_flag (vm, p0, bi0);
2101           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2102           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2103
2104           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2105
2106           vlib_set_next_frame_buffer (vm, node,
2107                                       adj0->rewrite_header.next_index, bi0);
2108         }
2109
2110       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2111     }
2112
2113   return frame->n_vectors;
2114 }
2115
2116 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2117                              vlib_frame_t * frame)
2118 {
2119   return (ip4_arp_inline (vm, node, frame, 0));
2120 }
2121
2122 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2123                                vlib_frame_t * frame)
2124 {
2125   return (ip4_arp_inline (vm, node, frame, 1));
2126 }
2127
2128 static char *ip4_arp_error_strings[] = {
2129   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2130   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2131   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2132   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2133   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2134   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2135 };
2136
2137 /* *INDENT-OFF* */
2138 VLIB_REGISTER_NODE (ip4_arp_node) =
2139 {
2140   .name = "ip4-arp",
2141   .vector_size = sizeof (u32),
2142   .format_trace = format_ip4_forward_next_trace,
2143   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2144   .error_strings = ip4_arp_error_strings,
2145   .n_next_nodes = IP4_ARP_N_NEXT,
2146   .next_nodes =
2147   {
2148     [IP4_ARP_NEXT_DROP] = "error-drop",
2149   },
2150 };
2151
2152 VLIB_REGISTER_NODE (ip4_glean_node) =
2153 {
2154   .name = "ip4-glean",
2155   .vector_size = sizeof (u32),
2156   .format_trace = format_ip4_forward_next_trace,
2157   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2158   .error_strings = ip4_arp_error_strings,
2159   .n_next_nodes = IP4_ARP_N_NEXT,
2160   .next_nodes = {
2161   [IP4_ARP_NEXT_DROP] = "error-drop",
2162   },
2163 };
2164 /* *INDENT-ON* */
2165
2166 #define foreach_notrace_ip4_arp_error           \
2167 _(THROTTLED)                                    \
2168 _(RESOLVED)                                     \
2169 _(NO_BUFFERS)                                   \
2170 _(REQUEST_SENT)                                 \
2171 _(NON_ARP_ADJ)                                  \
2172 _(NO_SOURCE_ADDRESS)
2173
2174 static clib_error_t *
2175 arp_notrace_init (vlib_main_t * vm)
2176 {
2177   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2178
2179   /* don't trace ARP request packets */
2180 #define _(a)                                    \
2181     vnet_pcap_drop_trace_filter_add_del         \
2182         (rt->errors[IP4_ARP_ERROR_##a],         \
2183          1 /* is_add */);
2184   foreach_notrace_ip4_arp_error;
2185 #undef _
2186   return 0;
2187 }
2188
2189 VLIB_INIT_FUNCTION (arp_notrace_init);
2190
2191
2192 #ifndef CLIB_MARCH_VARIANT
2193 /* Send an ARP request to see if given destination is reachable on given interface. */
2194 clib_error_t *
2195 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2196                     u8 refresh)
2197 {
2198   vnet_main_t *vnm = vnet_get_main ();
2199   ip4_main_t *im = &ip4_main;
2200   ethernet_arp_header_t *h;
2201   ip4_address_t *src;
2202   ip_interface_address_t *ia;
2203   ip_adjacency_t *adj;
2204   vnet_hw_interface_t *hi;
2205   vnet_sw_interface_t *si;
2206   vlib_buffer_t *b;
2207   adj_index_t ai;
2208   u32 bi = 0;
2209   u8 unicast_rewrite = 0;
2210
2211   si = vnet_get_sw_interface (vnm, sw_if_index);
2212
2213   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2214     {
2215       return clib_error_return (0, "%U: interface %U down",
2216                                 format_ip4_address, dst,
2217                                 format_vnet_sw_if_index_name, vnm,
2218                                 sw_if_index);
2219     }
2220
2221   src =
2222     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2223   if (!src)
2224     {
2225       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2226       return clib_error_return
2227         (0,
2228          "no matching interface address for destination %U (interface %U)",
2229          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2230          sw_if_index);
2231     }
2232
2233   h = vlib_packet_template_get_packet (vm,
2234                                        &im->ip4_arp_request_packet_template,
2235                                        &bi);
2236
2237   if (!h)
2238     return clib_error_return (0, "ARP request packet allocation failed");
2239
2240   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2241   if (PREDICT_FALSE (!hi->hw_address))
2242     {
2243       return clib_error_return (0, "%U: interface %U do not support ip probe",
2244                                 format_ip4_address, dst,
2245                                 format_vnet_sw_if_index_name, vnm,
2246                                 sw_if_index);
2247     }
2248
2249   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2250
2251   h->ip4_over_ethernet[0].ip4 = src[0];
2252   h->ip4_over_ethernet[1].ip4 = dst[0];
2253
2254   b = vlib_get_buffer (vm, bi);
2255   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2256     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2257
2258   ip46_address_t nh = {
2259     .ip4 = *dst,
2260   };
2261
2262   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2263                             VNET_LINK_IP4, &nh, sw_if_index);
2264   adj = adj_get (ai);
2265
2266   /* Peer has been previously resolved, retrieve glean adj instead */
2267   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2268     {
2269       if (refresh)
2270         unicast_rewrite = 1;
2271       else
2272         {
2273           adj_unlock (ai);
2274           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2275                                       VNET_LINK_IP4, sw_if_index, &nh);
2276           adj = adj_get (ai);
2277         }
2278     }
2279
2280   /* Add encapsulation string for software interface (e.g. ethernet header). */
2281   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2282   if (unicast_rewrite)
2283     {
2284       u16 *etype = vlib_buffer_get_current (b) - 2;
2285       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2286     }
2287   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2288
2289   {
2290     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2291     u32 *to_next = vlib_frame_vector_args (f);
2292     to_next[0] = bi;
2293     f->n_vectors = 1;
2294     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2295   }
2296
2297   adj_unlock (ai);
2298   return /* no error */ 0;
2299 }
2300 #endif
2301
2302 typedef enum
2303 {
2304   IP4_REWRITE_NEXT_DROP,
2305   IP4_REWRITE_NEXT_ICMP_ERROR,
2306   IP4_REWRITE_NEXT_FRAGMENT,
2307   IP4_REWRITE_N_NEXT            /* Last */
2308 } ip4_rewrite_next_t;
2309
2310 /**
2311  * This bits of an IPv4 address to mask to construct a multicast
2312  * MAC address
2313  */
2314 #if CLIB_ARCH_IS_BIG_ENDIAN
2315 #define IP4_MCAST_ADDR_MASK 0x007fffff
2316 #else
2317 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2318 #endif
2319
2320 always_inline void
2321 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2322                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2323 {
2324   if (packet_len > adj_packet_bytes)
2325     {
2326       *error = IP4_ERROR_MTU_EXCEEDED;
2327       if (df)
2328         {
2329           icmp4_error_set_vnet_buffer
2330             (b, ICMP4_destination_unreachable,
2331              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2332              adj_packet_bytes);
2333           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2334         }
2335       else
2336         {
2337           /* IP fragmentation */
2338           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2339                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2340           *next = IP4_REWRITE_NEXT_FRAGMENT;
2341         }
2342     }
2343 }
2344
2345 /* Decrement TTL & update checksum.
2346    Works either endian, so no need for byte swap. */
2347 static_always_inline void
2348 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2349                             u32 * error)
2350 {
2351   i32 ttl;
2352   u32 checksum;
2353   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2354     {
2355       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2356       return;
2357     }
2358
2359   ttl = ip->ttl;
2360
2361   /* Input node should have reject packets with ttl 0. */
2362   ASSERT (ip->ttl > 0);
2363
2364   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2365   checksum += checksum >= 0xffff;
2366
2367   ip->checksum = checksum;
2368   ttl -= 1;
2369   ip->ttl = ttl;
2370
2371   /*
2372    * If the ttl drops below 1 when forwarding, generate
2373    * an ICMP response.
2374    */
2375   if (PREDICT_FALSE (ttl <= 0))
2376     {
2377       *error = IP4_ERROR_TIME_EXPIRED;
2378       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2379       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2380                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2381                                    0);
2382       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2383     }
2384
2385   /* Verify checksum. */
2386   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2387           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2388 }
2389
2390
2391 always_inline uword
2392 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2393                              vlib_node_runtime_t * node,
2394                              vlib_frame_t * frame,
2395                              int do_counters, int is_midchain, int is_mcast,
2396                              int do_gso)
2397 {
2398   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2399   u32 *from = vlib_frame_vector_args (frame);
2400   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2401   u16 nexts[VLIB_FRAME_SIZE], *next;
2402   u32 n_left_from;
2403   vlib_node_runtime_t *error_node =
2404     vlib_node_get_runtime (vm, ip4_input_node.index);
2405
2406   n_left_from = frame->n_vectors;
2407   u32 thread_index = vm->thread_index;
2408
2409   vlib_get_buffers (vm, from, bufs, n_left_from);
2410   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2411
2412   if (n_left_from >= 6)
2413     {
2414       int i;
2415       for (i = 2; i < 6; i++)
2416         vlib_prefetch_buffer_header (bufs[i], LOAD);
2417     }
2418
2419   next = nexts;
2420   b = bufs;
2421   while (n_left_from >= 8)
2422     {
2423       ip_adjacency_t *adj0, *adj1;
2424       ip4_header_t *ip0, *ip1;
2425       u32 rw_len0, error0, adj_index0;
2426       u32 rw_len1, error1, adj_index1;
2427       u32 tx_sw_if_index0, tx_sw_if_index1;
2428       u8 *p;
2429
2430       vlib_prefetch_buffer_header (b[6], LOAD);
2431       vlib_prefetch_buffer_header (b[7], LOAD);
2432
2433       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2434       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2435
2436       /*
2437        * pre-fetch the per-adjacency counters
2438        */
2439       if (do_counters)
2440         {
2441           vlib_prefetch_combined_counter (&adjacency_counters,
2442                                           thread_index, adj_index0);
2443           vlib_prefetch_combined_counter (&adjacency_counters,
2444                                           thread_index, adj_index1);
2445         }
2446
2447       ip0 = vlib_buffer_get_current (b[0]);
2448       ip1 = vlib_buffer_get_current (b[1]);
2449
2450       error0 = error1 = IP4_ERROR_NONE;
2451
2452       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2453       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2454
2455       /* Rewrite packet header and updates lengths. */
2456       adj0 = adj_get (adj_index0);
2457       adj1 = adj_get (adj_index1);
2458
2459       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2460       rw_len0 = adj0[0].rewrite_header.data_bytes;
2461       rw_len1 = adj1[0].rewrite_header.data_bytes;
2462       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2463       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2464
2465       p = vlib_buffer_get_current (b[2]);
2466       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2467       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2468
2469       p = vlib_buffer_get_current (b[3]);
2470       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2471       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2472
2473       /* Check MTU of outgoing interface. */
2474       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2475       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2476
2477       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2478         ip0_len = gso_mtu_sz (b[0]);
2479       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2480         ip1_len = gso_mtu_sz (b[1]);
2481
2482       ip4_mtu_check (b[0], ip0_len,
2483                      adj0[0].rewrite_header.max_l3_packet_bytes,
2484                      ip0->flags_and_fragment_offset &
2485                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2486                      next + 0, &error0);
2487       ip4_mtu_check (b[1], ip1_len,
2488                      adj1[0].rewrite_header.max_l3_packet_bytes,
2489                      ip1->flags_and_fragment_offset &
2490                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2491                      next + 1, &error1);
2492
2493       if (is_mcast)
2494         {
2495           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2496                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2497                     IP4_ERROR_SAME_INTERFACE : error0);
2498           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2499                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2500                     IP4_ERROR_SAME_INTERFACE : error1);
2501         }
2502
2503       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2504        * to see the IP header */
2505       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2506         {
2507           u32 next_index = adj0[0].rewrite_header.next_index;
2508           b[0]->current_data -= rw_len0;
2509           b[0]->current_length += rw_len0;
2510           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2511           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2512
2513           if (PREDICT_FALSE
2514               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2515             vnet_feature_arc_start (lm->output_feature_arc_index,
2516                                     tx_sw_if_index0, &next_index, b[0]);
2517           next[0] = next_index;
2518         }
2519       else
2520         {
2521           b[0]->error = error_node->errors[error0];
2522         }
2523       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2524         {
2525           u32 next_index = adj1[0].rewrite_header.next_index;
2526           b[1]->current_data -= rw_len1;
2527           b[1]->current_length += rw_len1;
2528
2529           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2530           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2531
2532           if (PREDICT_FALSE
2533               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2534             vnet_feature_arc_start (lm->output_feature_arc_index,
2535                                     tx_sw_if_index1, &next_index, b[1]);
2536           next[1] = next_index;
2537         }
2538       else
2539         {
2540           b[1]->error = error_node->errors[error1];
2541         }
2542       if (is_midchain)
2543         {
2544           calc_checksums (vm, b[0]);
2545           calc_checksums (vm, b[1]);
2546         }
2547       /* Guess we are only writing on simple Ethernet header. */
2548       vnet_rewrite_two_headers (adj0[0], adj1[0],
2549                                 ip0, ip1, sizeof (ethernet_header_t));
2550
2551       /*
2552        * Bump the per-adjacency counters
2553        */
2554       if (do_counters)
2555         {
2556           vlib_increment_combined_counter
2557             (&adjacency_counters,
2558              thread_index,
2559              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2560
2561           vlib_increment_combined_counter
2562             (&adjacency_counters,
2563              thread_index,
2564              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2565         }
2566
2567       if (is_midchain)
2568         {
2569           if (adj0->sub_type.midchain.fixup_func)
2570             adj0->sub_type.midchain.fixup_func
2571               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2572           if (adj1->sub_type.midchain.fixup_func)
2573             adj1->sub_type.midchain.fixup_func
2574               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2575         }
2576
2577       if (is_mcast)
2578         {
2579           /*
2580            * copy bytes from the IP address into the MAC rewrite
2581            */
2582           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2583                                       adj0->rewrite_header.dst_mcast_offset,
2584                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2585           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2586                                       adj1->rewrite_header.dst_mcast_offset,
2587                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2588         }
2589
2590       next += 2;
2591       b += 2;
2592       n_left_from -= 2;
2593     }
2594
2595   while (n_left_from > 0)
2596     {
2597       ip_adjacency_t *adj0;
2598       ip4_header_t *ip0;
2599       u32 rw_len0, adj_index0, error0;
2600       u32 tx_sw_if_index0;
2601
2602       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2603
2604       adj0 = adj_get (adj_index0);
2605
2606       if (do_counters)
2607         vlib_prefetch_combined_counter (&adjacency_counters,
2608                                         thread_index, adj_index0);
2609
2610       ip0 = vlib_buffer_get_current (b[0]);
2611
2612       error0 = IP4_ERROR_NONE;
2613
2614       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2615
2616
2617       /* Update packet buffer attributes/set output interface. */
2618       rw_len0 = adj0[0].rewrite_header.data_bytes;
2619       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2620
2621       /* Check MTU of outgoing interface. */
2622       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2623       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2624         ip0_len = gso_mtu_sz (b[0]);
2625
2626       ip4_mtu_check (b[0], ip0_len,
2627                      adj0[0].rewrite_header.max_l3_packet_bytes,
2628                      ip0->flags_and_fragment_offset &
2629                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2630                      next + 0, &error0);
2631
2632       if (is_mcast)
2633         {
2634           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2635                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2636                     IP4_ERROR_SAME_INTERFACE : error0);
2637         }
2638
2639       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2640        * to see the IP header */
2641       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2642         {
2643           u32 next_index = adj0[0].rewrite_header.next_index;
2644           b[0]->current_data -= rw_len0;
2645           b[0]->current_length += rw_len0;
2646           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2647           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2648
2649           if (PREDICT_FALSE
2650               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2651             vnet_feature_arc_start (lm->output_feature_arc_index,
2652                                     tx_sw_if_index0, &next_index, b[0]);
2653           next[0] = next_index;
2654         }
2655       else
2656         {
2657           b[0]->error = error_node->errors[error0];
2658         }
2659       if (is_midchain)
2660         {
2661           calc_checksums (vm, b[0]);
2662         }
2663       /* Guess we are only writing on simple Ethernet header. */
2664       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2665
2666       if (do_counters)
2667         vlib_increment_combined_counter
2668           (&adjacency_counters,
2669            thread_index, adj_index0, 1,
2670            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2671
2672       if (is_midchain)
2673         {
2674           if (adj0->sub_type.midchain.fixup_func)
2675             adj0->sub_type.midchain.fixup_func
2676               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2677         }
2678
2679       if (is_mcast)
2680         {
2681           /*
2682            * copy bytes from the IP address into the MAC rewrite
2683            */
2684           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2685                                       adj0->rewrite_header.dst_mcast_offset,
2686                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2687         }
2688
2689       next += 1;
2690       b += 1;
2691       n_left_from -= 1;
2692     }
2693
2694
2695   /* Need to do trace after rewrites to pick up new packet data. */
2696   if (node->flags & VLIB_NODE_FLAG_TRACE)
2697     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2698
2699   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2700   return frame->n_vectors;
2701 }
2702
2703 always_inline uword
2704 ip4_rewrite_inline (vlib_main_t * vm,
2705                     vlib_node_runtime_t * node,
2706                     vlib_frame_t * frame,
2707                     int do_counters, int is_midchain, int is_mcast)
2708 {
2709   vnet_main_t *vnm = vnet_get_main ();
2710   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2711     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2712                                         is_midchain, is_mcast,
2713                                         1 /* do_gso */ );
2714   else
2715     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2716                                         is_midchain, is_mcast,
2717                                         0 /* no do_gso */ );
2718 }
2719
2720
2721 /** @brief IPv4 rewrite node.
2722     @node ip4-rewrite
2723
2724     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2725     header checksum, fetch the ip adjacency, check the outbound mtu,
2726     apply the adjacency rewrite, and send pkts to the adjacency
2727     rewrite header's rewrite_next_index.
2728
2729     @param vm vlib_main_t corresponding to the current thread
2730     @param node vlib_node_runtime_t
2731     @param frame vlib_frame_t whose contents should be dispatched
2732
2733     @par Graph mechanics: buffer metadata, next index usage
2734
2735     @em Uses:
2736     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2737         - the rewrite adjacency index
2738     - <code>adj->lookup_next_index</code>
2739         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2740           the packet will be dropped.
2741     - <code>adj->rewrite_header</code>
2742         - Rewrite string length, rewrite string, next_index
2743
2744     @em Sets:
2745     - <code>b->current_data, b->current_length</code>
2746         - Updated net of applying the rewrite string
2747
2748     <em>Next Indices:</em>
2749     - <code> adj->rewrite_header.next_index </code>
2750       or @c ip4-drop
2751 */
2752
2753 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2754                                  vlib_frame_t * frame)
2755 {
2756   if (adj_are_counters_enabled ())
2757     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2758   else
2759     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2760 }
2761
2762 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2763                                        vlib_node_runtime_t * node,
2764                                        vlib_frame_t * frame)
2765 {
2766   if (adj_are_counters_enabled ())
2767     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2768   else
2769     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2770 }
2771
2772 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2773                                   vlib_node_runtime_t * node,
2774                                   vlib_frame_t * frame)
2775 {
2776   if (adj_are_counters_enabled ())
2777     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2778   else
2779     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2780 }
2781
2782 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2783                                        vlib_node_runtime_t * node,
2784                                        vlib_frame_t * frame)
2785 {
2786   if (adj_are_counters_enabled ())
2787     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2788   else
2789     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2790 }
2791
2792 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2793                                         vlib_node_runtime_t * node,
2794                                         vlib_frame_t * frame)
2795 {
2796   if (adj_are_counters_enabled ())
2797     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2798   else
2799     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2800 }
2801
2802 /* *INDENT-OFF* */
2803 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2804   .name = "ip4-rewrite",
2805   .vector_size = sizeof (u32),
2806
2807   .format_trace = format_ip4_rewrite_trace,
2808
2809   .n_next_nodes = IP4_REWRITE_N_NEXT,
2810   .next_nodes = {
2811     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2812     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2813     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2814   },
2815 };
2816
2817 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2818   .name = "ip4-rewrite-bcast",
2819   .vector_size = sizeof (u32),
2820
2821   .format_trace = format_ip4_rewrite_trace,
2822   .sibling_of = "ip4-rewrite",
2823 };
2824
2825 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2826   .name = "ip4-rewrite-mcast",
2827   .vector_size = sizeof (u32),
2828
2829   .format_trace = format_ip4_rewrite_trace,
2830   .sibling_of = "ip4-rewrite",
2831 };
2832
2833 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2834   .name = "ip4-mcast-midchain",
2835   .vector_size = sizeof (u32),
2836
2837   .format_trace = format_ip4_rewrite_trace,
2838   .sibling_of = "ip4-rewrite",
2839 };
2840
2841 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2842   .name = "ip4-midchain",
2843   .vector_size = sizeof (u32),
2844   .format_trace = format_ip4_forward_next_trace,
2845   .sibling_of =  "ip4-rewrite",
2846 };
2847 /* *INDENT-ON */
2848
2849 static int
2850 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2851 {
2852   ip4_fib_mtrie_t *mtrie0;
2853   ip4_fib_mtrie_leaf_t leaf0;
2854   u32 lbi0;
2855
2856   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2857
2858   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2859   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2860   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2861
2862   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2863
2864   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2865 }
2866
2867 static clib_error_t *
2868 test_lookup_command_fn (vlib_main_t * vm,
2869                         unformat_input_t * input, vlib_cli_command_t * cmd)
2870 {
2871   ip4_fib_t *fib;
2872   u32 table_id = 0;
2873   f64 count = 1;
2874   u32 n;
2875   int i;
2876   ip4_address_t ip4_base_address;
2877   u64 errors = 0;
2878
2879   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2880     {
2881       if (unformat (input, "table %d", &table_id))
2882         {
2883           /* Make sure the entry exists. */
2884           fib = ip4_fib_get (table_id);
2885           if ((fib) && (fib->index != table_id))
2886             return clib_error_return (0, "<fib-index> %d does not exist",
2887                                       table_id);
2888         }
2889       else if (unformat (input, "count %f", &count))
2890         ;
2891
2892       else if (unformat (input, "%U",
2893                          unformat_ip4_address, &ip4_base_address))
2894         ;
2895       else
2896         return clib_error_return (0, "unknown input `%U'",
2897                                   format_unformat_error, input);
2898     }
2899
2900   n = count;
2901
2902   for (i = 0; i < n; i++)
2903     {
2904       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2905         errors++;
2906
2907       ip4_base_address.as_u32 =
2908         clib_host_to_net_u32 (1 +
2909                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2910     }
2911
2912   if (errors)
2913     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2914   else
2915     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2916
2917   return 0;
2918 }
2919
2920 /*?
2921  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2922  * given FIB table to determine if there is a conflict with the
2923  * adjacency table. The fib-id can be determined by using the
2924  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2925  * of 0 is used.
2926  *
2927  * @todo This command uses fib-id, other commands use table-id (not
2928  * just a name, they are different indexes). Would like to change this
2929  * to table-id for consistency.
2930  *
2931  * @cliexpar
2932  * Example of how to run the test lookup command:
2933  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2934  * No errors in 2 lookups
2935  * @cliexend
2936 ?*/
2937 /* *INDENT-OFF* */
2938 VLIB_CLI_COMMAND (lookup_test_command, static) =
2939 {
2940   .path = "test lookup",
2941   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2942   .function = test_lookup_command_fn,
2943 };
2944 /* *INDENT-ON* */
2945
2946 #ifndef CLIB_MARCH_VARIANT
2947 int
2948 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2949 {
2950   u32 fib_index;
2951
2952   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2953
2954   if (~0 == fib_index)
2955     return VNET_API_ERROR_NO_SUCH_FIB;
2956
2957   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2958                                   flow_hash_config);
2959
2960   return 0;
2961 }
2962 #endif
2963
2964 static clib_error_t *
2965 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2966                              unformat_input_t * input,
2967                              vlib_cli_command_t * cmd)
2968 {
2969   int matched = 0;
2970   u32 table_id = 0;
2971   u32 flow_hash_config = 0;
2972   int rv;
2973
2974   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2975     {
2976       if (unformat (input, "table %d", &table_id))
2977         matched = 1;
2978 #define _(a,v) \
2979     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2980       foreach_flow_hash_bit
2981 #undef _
2982         else
2983         break;
2984     }
2985
2986   if (matched == 0)
2987     return clib_error_return (0, "unknown input `%U'",
2988                               format_unformat_error, input);
2989
2990   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2991   switch (rv)
2992     {
2993     case 0:
2994       break;
2995
2996     case VNET_API_ERROR_NO_SUCH_FIB:
2997       return clib_error_return (0, "no such FIB table %d", table_id);
2998
2999     default:
3000       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3001       break;
3002     }
3003
3004   return 0;
3005 }
3006
3007 /*?
3008  * Configure the set of IPv4 fields used by the flow hash.
3009  *
3010  * @cliexpar
3011  * Example of how to set the flow hash on a given table:
3012  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3013  * Example of display the configured flow hash:
3014  * @cliexstart{show ip fib}
3015  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3016  * 0.0.0.0/0
3017  *   unicast-ip4-chain
3018  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3019  *     [0] [@0]: dpo-drop ip6
3020  * 0.0.0.0/32
3021  *   unicast-ip4-chain
3022  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3023  *     [0] [@0]: dpo-drop ip6
3024  * 224.0.0.0/8
3025  *   unicast-ip4-chain
3026  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3027  *     [0] [@0]: dpo-drop ip6
3028  * 6.0.1.2/32
3029  *   unicast-ip4-chain
3030  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3031  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3032  * 7.0.0.1/32
3033  *   unicast-ip4-chain
3034  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3035  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3036  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3037  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3038  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3039  * 240.0.0.0/8
3040  *   unicast-ip4-chain
3041  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3042  *     [0] [@0]: dpo-drop ip6
3043  * 255.255.255.255/32
3044  *   unicast-ip4-chain
3045  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3046  *     [0] [@0]: dpo-drop ip6
3047  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3048  * 0.0.0.0/0
3049  *   unicast-ip4-chain
3050  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3051  *     [0] [@0]: dpo-drop ip6
3052  * 0.0.0.0/32
3053  *   unicast-ip4-chain
3054  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3055  *     [0] [@0]: dpo-drop ip6
3056  * 172.16.1.0/24
3057  *   unicast-ip4-chain
3058  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3059  *     [0] [@4]: ipv4-glean: af_packet0
3060  * 172.16.1.1/32
3061  *   unicast-ip4-chain
3062  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3063  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3064  * 172.16.1.2/32
3065  *   unicast-ip4-chain
3066  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3067  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3068  * 172.16.2.0/24
3069  *   unicast-ip4-chain
3070  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3071  *     [0] [@4]: ipv4-glean: af_packet1
3072  * 172.16.2.1/32
3073  *   unicast-ip4-chain
3074  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3075  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3076  * 224.0.0.0/8
3077  *   unicast-ip4-chain
3078  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3079  *     [0] [@0]: dpo-drop ip6
3080  * 240.0.0.0/8
3081  *   unicast-ip4-chain
3082  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3083  *     [0] [@0]: dpo-drop ip6
3084  * 255.255.255.255/32
3085  *   unicast-ip4-chain
3086  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3087  *     [0] [@0]: dpo-drop ip6
3088  * @cliexend
3089 ?*/
3090 /* *INDENT-OFF* */
3091 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3092 {
3093   .path = "set ip flow-hash",
3094   .short_help =
3095   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3096   .function = set_ip_flow_hash_command_fn,
3097 };
3098 /* *INDENT-ON* */
3099
3100 #ifndef CLIB_MARCH_VARIANT
3101 int
3102 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3103                              u32 table_index)
3104 {
3105   vnet_main_t *vnm = vnet_get_main ();
3106   vnet_interface_main_t *im = &vnm->interface_main;
3107   ip4_main_t *ipm = &ip4_main;
3108   ip_lookup_main_t *lm = &ipm->lookup_main;
3109   vnet_classify_main_t *cm = &vnet_classify_main;
3110   ip4_address_t *if_addr;
3111
3112   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3113     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3114
3115   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3116     return VNET_API_ERROR_NO_SUCH_ENTRY;
3117
3118   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3119   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3120
3121   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3122
3123   if (NULL != if_addr)
3124     {
3125       fib_prefix_t pfx = {
3126         .fp_len = 32,
3127         .fp_proto = FIB_PROTOCOL_IP4,
3128         .fp_addr.ip4 = *if_addr,
3129       };
3130       u32 fib_index;
3131
3132       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3133                                                        sw_if_index);
3134
3135
3136       if (table_index != (u32) ~ 0)
3137         {
3138           dpo_id_t dpo = DPO_INVALID;
3139
3140           dpo_set (&dpo,
3141                    DPO_CLASSIFY,
3142                    DPO_PROTO_IP4,
3143                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3144
3145           fib_table_entry_special_dpo_add (fib_index,
3146                                            &pfx,
3147                                            FIB_SOURCE_CLASSIFY,
3148                                            FIB_ENTRY_FLAG_NONE, &dpo);
3149           dpo_reset (&dpo);
3150         }
3151       else
3152         {
3153           fib_table_entry_special_remove (fib_index,
3154                                           &pfx, FIB_SOURCE_CLASSIFY);
3155         }
3156     }
3157
3158   return 0;
3159 }
3160 #endif
3161
3162 static clib_error_t *
3163 set_ip_classify_command_fn (vlib_main_t * vm,
3164                             unformat_input_t * input,
3165                             vlib_cli_command_t * cmd)
3166 {
3167   u32 table_index = ~0;
3168   int table_index_set = 0;
3169   u32 sw_if_index = ~0;
3170   int rv;
3171
3172   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3173     {
3174       if (unformat (input, "table-index %d", &table_index))
3175         table_index_set = 1;
3176       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3177                          vnet_get_main (), &sw_if_index))
3178         ;
3179       else
3180         break;
3181     }
3182
3183   if (table_index_set == 0)
3184     return clib_error_return (0, "classify table-index must be specified");
3185
3186   if (sw_if_index == ~0)
3187     return clib_error_return (0, "interface / subif must be specified");
3188
3189   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3190
3191   switch (rv)
3192     {
3193     case 0:
3194       break;
3195
3196     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3197       return clib_error_return (0, "No such interface");
3198
3199     case VNET_API_ERROR_NO_SUCH_ENTRY:
3200       return clib_error_return (0, "No such classifier table");
3201     }
3202   return 0;
3203 }
3204
3205 /*?
3206  * Assign a classification table to an interface. The classification
3207  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3208  * commands. Once the table is create, use this command to filter packets
3209  * on an interface.
3210  *
3211  * @cliexpar
3212  * Example of how to assign a classification table to an interface:
3213  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3214 ?*/
3215 /* *INDENT-OFF* */
3216 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3217 {
3218     .path = "set ip classify",
3219     .short_help =
3220     "set ip classify intfc <interface> table-index <classify-idx>",
3221     .function = set_ip_classify_command_fn,
3222 };
3223 /* *INDENT-ON* */
3224
3225 static clib_error_t *
3226 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3227 {
3228   ip4_main_t *im = &ip4_main;
3229   uword heapsize = 0;
3230
3231   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3232     {
3233       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3234         ;
3235       else
3236         return clib_error_return (0,
3237                                   "invalid heap-size parameter `%U'",
3238                                   format_unformat_error, input);
3239     }
3240
3241   im->mtrie_heap_size = heapsize;
3242
3243   return 0;
3244 }
3245
3246 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3247
3248 /*
3249  * fd.io coding-style-patch-verification: ON
3250  *
3251  * Local Variables:
3252  * eval: (c-set-style "gnu")
3253  * End:
3254  */