ip: fix udp/tcp checksum corner cases
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381   if (a->address_length <= 30)
382     {
383       pfx_special.fp_len = a->address_length;
384       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386       /* set the glean route for the prefix */
387       fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                        FIB_SOURCE_INTERFACE,
389                                        (FIB_ENTRY_FLAG_CONNECTED |
390                                         FIB_ENTRY_FLAG_ATTACHED),
391                                        DPO_PROTO_IP4,
392                                        /* No next-hop address */
393                                        NULL,
394                                        sw_if_index,
395                                        /* invalid FIB index */
396                                        ~0,
397                                        1,
398                                        /* no out-label stack */
399                                        NULL,
400                                        FIB_ROUTE_PATH_FLAG_NONE);
401
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if:
529    * - deleting last intf addr in prefix
530    * - deleting intf addr used as default source address in glean adjacency
531    *
532    * We're done now otherwise
533    */
534   if ((if_prefix->ref_count > 0) &&
535       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541
542       /* remove glean route for prefix */
543       pfx_special.fp_addr.ip4 = *address;
544       pfx_special.fp_len = address_length;
545       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547       /* if no more intf addresses in prefix, remove other special routes */
548       if (!if_prefix->ref_count)
549         {
550           /* first address in prefix */
551           pfx_special.fp_addr.ip4.as_u32 =
552             address->as_u32 & im->fib_masks[address_length];
553           pfx_special.fp_len = 32;
554
555           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556           fib_table_entry_special_remove (fib_index,
557                                           &pfx_special,
558                                           FIB_SOURCE_INTERFACE);
559
560           /* prefix broadcast address */
561           pfx_special.fp_addr.ip4.as_u32 =
562             address->as_u32 | ~im->fib_masks[address_length];
563           pfx_special.fp_len = 32;
564
565           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566           fib_table_entry_special_remove (fib_index,
567                                           &pfx_special,
568                                           FIB_SOURCE_INTERFACE);
569         }
570       else
571         /* default source addr just got deleted, find another */
572         {
573           ip_interface_address_t *new_src_ia = NULL;
574           ip4_address_t *new_src_addr = NULL;
575
576           new_src_addr =
577             ip4_interface_address_matching_destination
578               (im, address, sw_if_index, &new_src_ia);
579
580           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582           pfx_special.fp_len = address_length;
583           pfx_special.fp_addr.ip4 = *new_src_addr;
584
585           /* set new glean route for the prefix */
586           fib_table_entry_update_one_path (fib_index, &pfx_special,
587                                            FIB_SOURCE_INTERFACE,
588                                            (FIB_ENTRY_FLAG_CONNECTED |
589                                             FIB_ENTRY_FLAG_ATTACHED),
590                                            DPO_PROTO_IP4,
591                                            /* No next-hop address */
592                                            NULL,
593                                            sw_if_index,
594                                            /* invalid FIB index */
595                                            ~0,
596                                            1,
597                                            /* no out-label stack */
598                                            NULL,
599                                            FIB_ROUTE_PATH_FLAG_NONE);
600           return;
601         }
602     }
603   /* length == 31, delete attached route for the other address */
604   else if (address_length == 31)
605     {
606       pfx_special.fp_addr.ip4.as_u32 =
607         address->as_u32 ^ clib_host_to_net_u32(1);
608
609       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610     }
611
612   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613   pool_put (lm->if_prefix_pool, if_prefix);
614 }
615
616 static void
617 ip4_del_interface_routes (u32 sw_if_index,
618                           ip4_main_t * im,
619                           u32 fib_index,
620                           ip4_address_t * address, u32 address_length)
621 {
622   fib_prefix_t pfx = {
623     .fp_len = address_length,
624     .fp_proto = FIB_PROTOCOL_IP4,
625     .fp_addr.ip4 = *address,
626   };
627
628   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629                                    address, address_length);
630
631   pfx.fp_len = 32;
632   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633 }
634
635 #ifndef CLIB_MARCH_VARIANT
636 void
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638 {
639   ip4_main_t *im = &ip4_main;
640
641   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643   /*
644    * enable/disable only on the 1<->0 transition
645    */
646   if (is_enable)
647     {
648       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649         return;
650     }
651   else
652     {
653       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655         return;
656     }
657   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658                                !is_enable, 0, 0);
659
660
661   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662                                sw_if_index, !is_enable, 0, 0);
663
664   {
665     ip4_enable_disable_interface_callback_t *cb;
666     vec_foreach (cb, im->enable_disable_interface_callbacks)
667       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668   }
669 }
670
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
673                                         u32 sw_if_index,
674                                         ip4_address_t * address,
675                                         u32 address_length, u32 is_del)
676 {
677   vnet_main_t *vnm = vnet_get_main ();
678   ip4_main_t *im = &ip4_main;
679   ip_lookup_main_t *lm = &im->lookup_main;
680   clib_error_t *error = 0;
681   u32 if_address_index, elts_before;
682   ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684   /* local0 interface doesn't support IP addressing  */
685   if (sw_if_index == 0)
686     {
687       return
688        clib_error_create ("local0 interface doesn't support IP addressing");
689     }
690
691   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692   ip4_addr_fib_init (&ip4_af, address,
693                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694   vec_add1 (addr_fib, ip4_af);
695
696   /*
697    * there is no support for adj-fib handling in the presence of overlapping
698    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699    * most routers do.
700    */
701   /* *INDENT-OFF* */
702   if (!is_del)
703     {
704       /* When adding an address check that it does not conflict
705          with an existing address on any interface in this table. */
706       ip_interface_address_t *ia;
707       vnet_sw_interface_t *sif;
708
709       pool_foreach(sif, vnm->interface_main.sw_interfaces,
710       ({
711           if (im->fib_index_by_sw_if_index[sw_if_index] ==
712               im->fib_index_by_sw_if_index[sif->sw_if_index])
713             {
714               foreach_ip_interface_address
715                 (&im->lookup_main, ia, sif->sw_if_index,
716                  0 /* honor unnumbered */ ,
717                  ({
718                    ip4_address_t * x =
719                      ip_interface_address_get_address
720                      (&im->lookup_main, ia);
721                    if (ip4_destination_matches_route
722                        (im, address, x, ia->address_length) ||
723                        ip4_destination_matches_route (im,
724                                                       x,
725                                                       address,
726                                                       address_length))
727                      {
728                        /* an intf may have >1 addr from the same prefix */
729                        if ((sw_if_index == sif->sw_if_index) &&
730                            (ia->address_length == address_length) &&
731                            (x->as_u32 != address->as_u32))
732                          continue;
733
734                        /* error if the length or intf was different */
735                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                        return
738                          clib_error_create
739                          ("failed to add %U which conflicts with %U for interface %U",
740                           format_ip4_address_and_length, address,
741                           address_length,
742                           format_ip4_address_and_length, x,
743                           ia->address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sif->sw_if_index);
746                      }
747                  }));
748             }
749       }));
750     }
751   /* *INDENT-ON* */
752
753   elts_before = pool_elts (lm->if_address_pool);
754
755   error = ip_interface_address_add_del
756     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
757   if (error)
758     goto done;
759
760   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
761
762   /* intf addr routes are added/deleted on admin up/down */
763   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
764     {
765       if (is_del)
766         ip4_del_interface_routes (sw_if_index,
767                                   im, ip4_af.fib_index, address,
768                                   address_length);
769       else
770         ip4_add_interface_routes (sw_if_index,
771                                   im, ip4_af.fib_index,
772                                   pool_elt_at_index
773                                   (lm->if_address_pool, if_address_index));
774     }
775
776   /* If pool did not grow/shrink: add duplicate address. */
777   if (elts_before != pool_elts (lm->if_address_pool))
778     {
779       ip4_add_del_interface_address_callback_t *cb;
780       vec_foreach (cb, im->add_del_interface_address_callbacks)
781         cb->function (im, cb->function_opaque, sw_if_index,
782                       address, address_length, if_address_index, is_del);
783     }
784
785 done:
786   vec_free (addr_fib);
787   return error;
788 }
789
790 clib_error_t *
791 ip4_add_del_interface_address (vlib_main_t * vm,
792                                u32 sw_if_index,
793                                ip4_address_t * address,
794                                u32 address_length, u32 is_del)
795 {
796   return ip4_add_del_interface_address_internal
797     (vm, sw_if_index, address, address_length, is_del);
798 }
799
800 void
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
802 {
803   ip_interface_address_t *ia;
804   ip4_main_t *im;
805
806   im = &ip4_main;
807
808   /*
809    * when directed broadcast is enabled, the subnet braodcast route will forward
810    * packets using an adjacency with a broadcast MAC. otherwise it drops
811    */
812   /* *INDENT-OFF* */
813   foreach_ip_interface_address(&im->lookup_main, ia,
814                                sw_if_index, 0,
815      ({
816        if (ia->address_length <= 30)
817          {
818            ip4_address_t *ipa;
819
820            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
821
822            fib_prefix_t pfx = {
823              .fp_len = 32,
824              .fp_proto = FIB_PROTOCOL_IP4,
825              .fp_addr = {
826                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
827              },
828            };
829
830            ip4_add_subnet_bcast_route
831              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
832                                                   sw_if_index),
833               &pfx, sw_if_index);
834          }
835      }));
836   /* *INDENT-ON* */
837 }
838 #endif
839
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
842 {
843   ip4_main_t *im = &ip4_main;
844   ip_interface_address_t *ia;
845   ip4_address_t *a;
846   u32 is_admin_up, fib_index;
847
848   /* Fill in lookup tables with default table (0). */
849   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
850
851   vec_validate_init_empty (im->
852                            lookup_main.if_address_pool_index_by_sw_if_index,
853                            sw_if_index, ~0);
854
855   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
856
857   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
858
859   /* *INDENT-OFF* */
860   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861                                 0 /* honor unnumbered */,
862   ({
863     a = ip_interface_address_get_address (&im->lookup_main, ia);
864     if (is_admin_up)
865       ip4_add_interface_routes (sw_if_index,
866                                 im, fib_index,
867                                 ia);
868     else
869       ip4_del_interface_routes (sw_if_index,
870                                 im, fib_index,
871                                 a, ia->address_length);
872   }));
873   /* *INDENT-ON* */
874
875   return 0;
876 }
877
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
879
880 /* Built-in ip4 unicast rx feature path definition */
881 /* *INDENT-OFF* */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
883 {
884   .arc_name = "ip4-unicast",
885   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886   .last_in_arc = "ip4-lookup",
887   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
888 };
889
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
891 {
892   .arc_name = "ip4-unicast",
893   .node_name = "ip4-flow-classify",
894   .runs_before = VNET_FEATURES ("ip4-inacl"),
895 };
896
897 VNET_FEATURE_INIT (ip4_inacl, static) =
898 {
899   .arc_name = "ip4-unicast",
900   .node_name = "ip4-inacl",
901   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
902 };
903
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
905 {
906   .arc_name = "ip4-unicast",
907   .node_name = "ip4-source-check-via-rx",
908   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
909 };
910
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
912 {
913   .arc_name = "ip4-unicast",
914   .node_name = "ip4-source-check-via-any",
915   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
916 };
917
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-source-and-port-range-check-rx",
922   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
923 };
924
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-policer-classify",
929   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
930 };
931
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ipsec4-input-feature",
936   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
937 };
938
939 VNET_FEATURE_INIT (ip4_vpath, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "vpath-input-ip4",
943   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
944 };
945
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-vxlan-bypass",
950   .runs_before = VNET_FEATURES ("ip4-lookup"),
951 };
952
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-not-enabled",
957   .runs_before = VNET_FEATURES ("ip4-lookup"),
958 };
959
960 VNET_FEATURE_INIT (ip4_lookup, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-lookup",
964   .runs_before = 0,     /* not before any other features */
965 };
966
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
969 {
970   .arc_name = "ip4-multicast",
971   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972   .last_in_arc = "ip4-mfib-forward-lookup",
973   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
974 };
975
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
977 {
978   .arc_name = "ip4-multicast",
979   .node_name = "vpath-input-ip4",
980   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
981 };
982
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
984 {
985   .arc_name = "ip4-multicast",
986   .node_name = "ip4-not-enabled",
987   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
988 };
989
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "ip4-mfib-forward-lookup",
994   .runs_before = 0,     /* last feature */
995 };
996
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
999 {
1000   .arc_name = "ip4-output",
1001   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002   .last_in_arc = "interface-output",
1003   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1007 {
1008   .arc_name = "ip4-output",
1009   .node_name = "ip4-source-and-port-range-check-tx",
1010   .runs_before = VNET_FEATURES ("ip4-outacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1014 {
1015   .arc_name = "ip4-output",
1016   .node_name = "ip4-outacl",
1017   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ipsec4-output-feature",
1024   .runs_before = VNET_FEATURES ("interface-output"),
1025 };
1026
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1029 {
1030   .arc_name = "ip4-output",
1031   .node_name = "interface-output",
1032   .runs_before = 0,     /* not before any other features */
1033 };
1034 /* *INDENT-ON* */
1035
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1038 {
1039   ip4_main_t *im = &ip4_main;
1040
1041   /* Fill in lookup tables with default table (0). */
1042   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1044
1045   if (!is_add)
1046     {
1047       ip4_main_t *im4 = &ip4_main;
1048       ip_lookup_main_t *lm4 = &im4->lookup_main;
1049       ip_interface_address_t *ia = 0;
1050       ip4_address_t *address;
1051       vlib_main_t *vm = vlib_get_main ();
1052
1053       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1054       /* *INDENT-OFF* */
1055       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1056       ({
1057         address = ip_interface_address_get_address (lm4, ia);
1058         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1059       }));
1060       /* *INDENT-ON* */
1061     }
1062
1063   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1064                                is_add, 0, 0);
1065
1066   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067                                sw_if_index, is_add, 0, 0);
1068
1069   return /* no error */ 0;
1070 }
1071
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1073
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1078
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1081 {
1082   ip4_main_t *im = &ip4_main;
1083   clib_error_t *error;
1084   uword i;
1085
1086   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1087     return error;
1088   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1089     return (error);
1090   if ((error = vlib_call_init_function (vm, fib_module_init)))
1091     return error;
1092   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1093     return error;
1094
1095   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1096     {
1097       u32 m;
1098
1099       if (i < 32)
1100         m = pow2_mask (i) << (32 - i);
1101       else
1102         m = ~0;
1103       im->fib_masks[i] = clib_host_to_net_u32 (m);
1104     }
1105
1106   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1107
1108   /* Create FIB with index 0 and table id of 0. */
1109   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110                                      FIB_SOURCE_DEFAULT_ROUTE);
1111   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112                                       MFIB_SOURCE_DEFAULT_ROUTE);
1113
1114   {
1115     pg_node_t *pn;
1116     pn = pg_get_node (ip4_lookup_node.index);
1117     pn->unformat_edit = unformat_pg_ip4_header;
1118   }
1119
1120   {
1121     ethernet_arp_header_t h;
1122
1123     clib_memset (&h, 0, sizeof (h));
1124
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128     _16 (l3_type, ETHERNET_TYPE_IP4);
1129     _8 (n_l2_address_bytes, 6);
1130     _8 (n_l3_address_bytes, 4);
1131     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1132 #undef _16
1133 #undef _8
1134
1135     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1136                                /* data */ &h,
1137                                sizeof (h),
1138                                /* alloc chunk size */ 8,
1139                                "ip4 arp");
1140   }
1141
1142   return error;
1143 }
1144
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1146
1147 typedef struct
1148 {
1149   /* Adjacency taken. */
1150   u32 dpo_index;
1151   u32 flow_hash;
1152   u32 fib_index;
1153
1154   /* Packet data, possibly *after* rewrite. */
1155   u8 packet_data[64 - 1 * sizeof (u32)];
1156 }
1157 ip4_forward_next_trace_t;
1158
1159 #ifndef CLIB_MARCH_VARIANT
1160 u8 *
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1162 {
1163   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166   u32 indent = format_get_indent (s);
1167   s = format (s, "%U%U",
1168               format_white_space, indent,
1169               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1170   return s;
1171 }
1172 #endif
1173
1174 static u8 *
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1176 {
1177   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180   u32 indent = format_get_indent (s);
1181
1182   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183               t->fib_index, t->dpo_index, t->flow_hash);
1184   s = format (s, "\n%U%U",
1185               format_white_space, indent,
1186               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1187   return s;
1188 }
1189
1190 static u8 *
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1192 {
1193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196   u32 indent = format_get_indent (s);
1197
1198   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199               t->fib_index, t->dpo_index, format_ip_adjacency,
1200               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201   s = format (s, "\n%U%U",
1202               format_white_space, indent,
1203               format_ip_adjacency_packet_data,
1204               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1205   return s;
1206 }
1207
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1210 void
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212                         vlib_node_runtime_t * node,
1213                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1214 {
1215   u32 *from, n_left;
1216   ip4_main_t *im = &ip4_main;
1217
1218   n_left = frame->n_vectors;
1219   from = vlib_frame_vector_args (frame);
1220
1221   while (n_left >= 4)
1222     {
1223       u32 bi0, bi1;
1224       vlib_buffer_t *b0, *b1;
1225       ip4_forward_next_trace_t *t0, *t1;
1226
1227       /* Prefetch next iteration. */
1228       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1230
1231       bi0 = from[0];
1232       bi1 = from[1];
1233
1234       b0 = vlib_get_buffer (vm, bi0);
1235       b1 = vlib_get_buffer (vm, bi1);
1236
1237       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1238         {
1239           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1242           t0->fib_index =
1243             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245             vec_elt (im->fib_index_by_sw_if_index,
1246                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1247
1248           clib_memcpy_fast (t0->packet_data,
1249                             vlib_buffer_get_current (b0),
1250                             sizeof (t0->packet_data));
1251         }
1252       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1253         {
1254           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1257           t1->fib_index =
1258             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260             vec_elt (im->fib_index_by_sw_if_index,
1261                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263                             sizeof (t1->packet_data));
1264         }
1265       from += 2;
1266       n_left -= 2;
1267     }
1268
1269   while (n_left >= 1)
1270     {
1271       u32 bi0;
1272       vlib_buffer_t *b0;
1273       ip4_forward_next_trace_t *t0;
1274
1275       bi0 = from[0];
1276
1277       b0 = vlib_get_buffer (vm, bi0);
1278
1279       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1280         {
1281           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1284           t0->fib_index =
1285             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287             vec_elt (im->fib_index_by_sw_if_index,
1288                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290                             sizeof (t0->packet_data));
1291         }
1292       from += 1;
1293       n_left -= 1;
1294     }
1295 }
1296
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1298 u16
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1300                               ip4_header_t * ip0)
1301 {
1302   ip_csum_t sum0;
1303   u32 ip_header_length, payload_length_host_byte_order;
1304   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1305   u16 sum16;
1306   u8 *data_this_buffer;
1307   u8 length_odd;
1308
1309   /* Initialize checksum with ip header. */
1310   ip_header_length = ip4_header_bytes (ip0);
1311   payload_length_host_byte_order =
1312     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1313   sum0 =
1314     clib_host_to_net_u32 (payload_length_host_byte_order +
1315                           (ip0->protocol << 16));
1316
1317   if (BITS (uword) == 32)
1318     {
1319       sum0 =
1320         ip_csum_with_carry (sum0,
1321                             clib_mem_unaligned (&ip0->src_address, u32));
1322       sum0 =
1323         ip_csum_with_carry (sum0,
1324                             clib_mem_unaligned (&ip0->dst_address, u32));
1325     }
1326   else
1327     sum0 =
1328       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1329
1330   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1331   data_this_buffer = (u8 *) ip0 + ip_header_length;
1332   n_ip_bytes_this_buffer =
1333     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1334   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1335     {
1336       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1337         n_ip_bytes_this_buffer - ip_header_length : 0;
1338     }
1339
1340   while (1)
1341     {
1342       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1343       n_bytes_left -= n_this_buffer;
1344       if (n_bytes_left == 0)
1345         break;
1346
1347       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1348       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1349         return 0xfefe;
1350
1351       length_odd = (n_this_buffer & 1);
1352
1353       p0 = vlib_get_buffer (vm, p0->next_buffer);
1354       data_this_buffer = vlib_buffer_get_current (p0);
1355       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1356
1357       if (PREDICT_FALSE (length_odd))
1358         {
1359           /* Prepend a 0 or the resulting checksum will be incorrect. */
1360           data_this_buffer--;
1361           n_this_buffer++;
1362           n_bytes_left++;
1363           data_this_buffer[0] = 0;
1364         }
1365     }
1366
1367   sum16 = ~ip_csum_fold (sum0);
1368   return sum16;
1369 }
1370
1371 u32
1372 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1373 {
1374   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1375   udp_header_t *udp0;
1376   u16 sum16;
1377
1378   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1379           || ip0->protocol == IP_PROTOCOL_UDP);
1380
1381   udp0 = (void *) (ip0 + 1);
1382   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1383     {
1384       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1385                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1386       return p0->flags;
1387     }
1388
1389   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1390
1391   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1392                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1393
1394   return p0->flags;
1395 }
1396 #endif
1397
1398 /* *INDENT-OFF* */
1399 VNET_FEATURE_ARC_INIT (ip4_local) =
1400 {
1401   .arc_name  = "ip4-local",
1402   .start_nodes = VNET_FEATURES ("ip4-local"),
1403   .last_in_arc = "ip4-local-end-of-arc",
1404 };
1405 /* *INDENT-ON* */
1406
1407 static inline void
1408 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1409                             ip4_header_t * ip, u8 is_udp, u8 * error,
1410                             u8 * good_tcp_udp)
1411 {
1412   u32 flags0;
1413   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1414   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1415   if (is_udp)
1416     {
1417       udp_header_t *udp;
1418       u32 ip_len, udp_len;
1419       i32 len_diff;
1420       udp = ip4_next_header (ip);
1421       /* Verify UDP length. */
1422       ip_len = clib_net_to_host_u16 (ip->length);
1423       udp_len = clib_net_to_host_u16 (udp->length);
1424
1425       len_diff = ip_len - udp_len;
1426       *good_tcp_udp &= len_diff >= 0;
1427       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1428     }
1429 }
1430
1431 #define ip4_local_csum_is_offloaded(_b)                                 \
1432     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1433         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1434
1435 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1436     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1437         || ip4_local_csum_is_offloaded (_b)))
1438
1439 #define ip4_local_csum_is_valid(_b)                                     \
1440     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1441         || (ip4_local_csum_is_offloaded (_b))) != 0
1442
1443 static inline void
1444 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1445                          ip4_header_t * ih, u8 * error)
1446 {
1447   u8 is_udp, is_tcp_udp, good_tcp_udp;
1448
1449   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1450   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1451
1452   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1453     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1454   else
1455     good_tcp_udp = ip4_local_csum_is_valid (b);
1456
1457   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1458   *error = (is_tcp_udp && !good_tcp_udp
1459             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1460 }
1461
1462 static inline void
1463 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1464                             ip4_header_t ** ih, u8 * error)
1465 {
1466   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1467
1468   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1469   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1470
1471   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1472   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1473
1474   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1475   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1476
1477   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1478                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1479     {
1480       if (is_tcp_udp[0])
1481         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1482                                     &good_tcp_udp[0]);
1483       if (is_tcp_udp[1])
1484         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1485                                     &good_tcp_udp[1]);
1486     }
1487
1488   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1489               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1490   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1491               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1492 }
1493
1494 static inline void
1495 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1496                               vlib_buffer_t * b, u16 * next, u8 error,
1497                               u8 head_of_feature_arc)
1498 {
1499   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1500   u32 next_index;
1501
1502   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1503   b->error = error ? error_node->errors[error] : 0;
1504   if (head_of_feature_arc)
1505     {
1506       next_index = *next;
1507       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1508         {
1509           vnet_feature_arc_start (arc_index,
1510                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1511                                   &next_index, b);
1512           *next = next_index;
1513         }
1514     }
1515 }
1516
1517 typedef struct
1518 {
1519   ip4_address_t src;
1520   u32 lbi;
1521   u8 error;
1522   u8 first;
1523 } ip4_local_last_check_t;
1524
1525 static inline void
1526 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1527                      ip4_local_last_check_t * last_check, u8 * error0)
1528 {
1529   ip4_fib_mtrie_leaf_t leaf0;
1530   ip4_fib_mtrie_t *mtrie0;
1531   const dpo_id_t *dpo0;
1532   load_balance_t *lb0;
1533   u32 lbi0;
1534
1535   vnet_buffer (b)->ip.fib_index =
1536     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1537     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1538
1539   /*
1540    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1541    *  adjacency for the destination address (the local interface address).
1542    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1543    *  adjacency for the source address (the remote sender's address)
1544    */
1545   if (PREDICT_FALSE (last_check->first ||
1546                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1547     {
1548       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1549       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1550       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1551       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1552       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1553
1554       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1555         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1556       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1557
1558       lb0 = load_balance_get (lbi0);
1559       dpo0 = load_balance_get_bucket_i (lb0, 0);
1560
1561       /*
1562        * Must have a route to source otherwise we drop the packet.
1563        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1564        *
1565        * The checks are:
1566        *  - the source is a recieve => it's from us => bogus, do this
1567        *    first since it sets a different error code.
1568        *  - uRPF check for any route to source - accept if passes.
1569        *  - allow packets destined to the broadcast address from unknown sources
1570        */
1571
1572       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1573                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1574                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1575       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1576                   && !fib_urpf_check_size (lb0->lb_urpf)
1577                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1578                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1579
1580       last_check->src.as_u32 = ip0->src_address.as_u32;
1581       last_check->lbi = lbi0;
1582       last_check->error = *error0;
1583     }
1584   else
1585     {
1586       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1587         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1588       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1589       *error0 = last_check->error;
1590       last_check->first = 0;
1591     }
1592 }
1593
1594 static inline void
1595 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1596                         ip4_local_last_check_t * last_check, u8 * error)
1597 {
1598   ip4_fib_mtrie_leaf_t leaf[2];
1599   ip4_fib_mtrie_t *mtrie[2];
1600   const dpo_id_t *dpo[2];
1601   load_balance_t *lb[2];
1602   u32 not_last_hit;
1603   u32 lbi[2];
1604
1605   not_last_hit = last_check->first;
1606   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1607   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1608
1609   vnet_buffer (b[0])->ip.fib_index =
1610     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1611     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1612     vnet_buffer (b[0])->ip.fib_index;
1613
1614   vnet_buffer (b[1])->ip.fib_index =
1615     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1616     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1617     vnet_buffer (b[1])->ip.fib_index;
1618
1619   /*
1620    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1621    *  adjacency for the destination address (the local interface address).
1622    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1623    *  adjacency for the source address (the remote sender's address)
1624    */
1625   if (PREDICT_FALSE (not_last_hit))
1626     {
1627       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1628       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1629
1630       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1631       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1632
1633       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1634                                            &ip[0]->src_address, 2);
1635       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1636                                            &ip[1]->src_address, 2);
1637
1638       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1639                                            &ip[0]->src_address, 3);
1640       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1641                                            &ip[1]->src_address, 3);
1642
1643       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1644       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1645
1646       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1647         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1648       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1649
1650       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1651         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1652       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1653
1654       lb[0] = load_balance_get (lbi[0]);
1655       lb[1] = load_balance_get (lbi[1]);
1656
1657       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1658       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1659
1660       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1662                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1663       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1664                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1665                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1666                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1667
1668       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1669                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1670                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1671       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1672                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1673                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1674                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1675
1676       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1677       last_check->lbi = lbi[1];
1678       last_check->error = error[1];
1679     }
1680   else
1681     {
1682       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1683         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1684       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1685
1686       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1687         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1688       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1689
1690       error[0] = last_check->error;
1691       error[1] = last_check->error;
1692       last_check->first = 0;
1693     }
1694 }
1695
1696 enum ip_local_packet_type_e
1697 {
1698   IP_LOCAL_PACKET_TYPE_L4,
1699   IP_LOCAL_PACKET_TYPE_NAT,
1700   IP_LOCAL_PACKET_TYPE_FRAG,
1701 };
1702
1703 /**
1704  * Determine packet type and next node.
1705  *
1706  * The expectation is that all packets that are not L4 will skip
1707  * checksums and source checks.
1708  */
1709 always_inline u8
1710 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1711 {
1712   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1713
1714   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1715     {
1716       *next = IP_LOCAL_NEXT_REASSEMBLY;
1717       return IP_LOCAL_PACKET_TYPE_FRAG;
1718     }
1719   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1720     {
1721       *next = lm->local_next_by_ip_protocol[ip->protocol];
1722       return IP_LOCAL_PACKET_TYPE_NAT;
1723     }
1724
1725   *next = lm->local_next_by_ip_protocol[ip->protocol];
1726   return IP_LOCAL_PACKET_TYPE_L4;
1727 }
1728
1729 static inline uword
1730 ip4_local_inline (vlib_main_t * vm,
1731                   vlib_node_runtime_t * node,
1732                   vlib_frame_t * frame, int head_of_feature_arc)
1733 {
1734   u32 *from, n_left_from;
1735   vlib_node_runtime_t *error_node =
1736     vlib_node_get_runtime (vm, ip4_input_node.index);
1737   u16 nexts[VLIB_FRAME_SIZE], *next;
1738   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1739   ip4_header_t *ip[2];
1740   u8 error[2], pt[2];
1741
1742   ip4_local_last_check_t last_check = {
1743     /*
1744      * 0.0.0.0 can appear as the source address of an IP packet,
1745      * as can any other address, hence the need to use the 'first'
1746      * member to make sure the .lbi is initialised for the first
1747      * packet.
1748      */
1749     .src = {.as_u32 = 0},
1750     .lbi = ~0,
1751     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1752     .first = 1,
1753   };
1754
1755   from = vlib_frame_vector_args (frame);
1756   n_left_from = frame->n_vectors;
1757
1758   if (node->flags & VLIB_NODE_FLAG_TRACE)
1759     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1760
1761   vlib_get_buffers (vm, from, bufs, n_left_from);
1762   b = bufs;
1763   next = nexts;
1764
1765   while (n_left_from >= 6)
1766     {
1767       u8 not_batch = 0;
1768
1769       /* Prefetch next iteration. */
1770       {
1771         vlib_prefetch_buffer_header (b[4], LOAD);
1772         vlib_prefetch_buffer_header (b[5], LOAD);
1773
1774         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1775         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1776       }
1777
1778       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1779
1780       ip[0] = vlib_buffer_get_current (b[0]);
1781       ip[1] = vlib_buffer_get_current (b[1]);
1782
1783       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1784       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1785
1786       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1787       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1788
1789       not_batch = pt[0] ^ pt[1];
1790
1791       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1792         goto skip_checks;
1793
1794       if (PREDICT_TRUE (not_batch == 0))
1795         {
1796           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1797           ip4_local_check_src_x2 (b, ip, &last_check, error);
1798         }
1799       else
1800         {
1801           if (!pt[0])
1802             {
1803               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1804               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1805             }
1806           if (!pt[1])
1807             {
1808               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1809               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1810             }
1811         }
1812
1813     skip_checks:
1814
1815       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1816                                     head_of_feature_arc);
1817       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1818                                     head_of_feature_arc);
1819
1820       b += 2;
1821       next += 2;
1822       n_left_from -= 2;
1823     }
1824
1825   while (n_left_from > 0)
1826     {
1827       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1828
1829       ip[0] = vlib_buffer_get_current (b[0]);
1830       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1831       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1832
1833       if (head_of_feature_arc == 0 || pt[0])
1834         goto skip_check;
1835
1836       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1837       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1838
1839     skip_check:
1840
1841       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1842                                     head_of_feature_arc);
1843
1844       b += 1;
1845       next += 1;
1846       n_left_from -= 1;
1847     }
1848
1849   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1850   return frame->n_vectors;
1851 }
1852
1853 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1854                                vlib_frame_t * frame)
1855 {
1856   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1857 }
1858
1859 /* *INDENT-OFF* */
1860 VLIB_REGISTER_NODE (ip4_local_node) =
1861 {
1862   .name = "ip4-local",
1863   .vector_size = sizeof (u32),
1864   .format_trace = format_ip4_forward_next_trace,
1865   .n_next_nodes = IP_LOCAL_N_NEXT,
1866   .next_nodes =
1867   {
1868     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1869     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1870     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1871     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1872     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1873   },
1874 };
1875 /* *INDENT-ON* */
1876
1877
1878 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1879                                           vlib_node_runtime_t * node,
1880                                           vlib_frame_t * frame)
1881 {
1882   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1883 }
1884
1885 /* *INDENT-OFF* */
1886 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1887   .name = "ip4-local-end-of-arc",
1888   .vector_size = sizeof (u32),
1889
1890   .format_trace = format_ip4_forward_next_trace,
1891   .sibling_of = "ip4-local",
1892 };
1893
1894 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1895   .arc_name = "ip4-local",
1896   .node_name = "ip4-local-end-of-arc",
1897   .runs_before = 0, /* not before any other features */
1898 };
1899 /* *INDENT-ON* */
1900
1901 #ifndef CLIB_MARCH_VARIANT
1902 void
1903 ip4_register_protocol (u32 protocol, u32 node_index)
1904 {
1905   vlib_main_t *vm = vlib_get_main ();
1906   ip4_main_t *im = &ip4_main;
1907   ip_lookup_main_t *lm = &im->lookup_main;
1908
1909   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1910   lm->local_next_by_ip_protocol[protocol] =
1911     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1912 }
1913
1914 void
1915 ip4_unregister_protocol (u32 protocol)
1916 {
1917   ip4_main_t *im = &ip4_main;
1918   ip_lookup_main_t *lm = &im->lookup_main;
1919
1920   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1921   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1922 }
1923 #endif
1924
1925 static clib_error_t *
1926 show_ip_local_command_fn (vlib_main_t * vm,
1927                           unformat_input_t * input, vlib_cli_command_t * cmd)
1928 {
1929   ip4_main_t *im = &ip4_main;
1930   ip_lookup_main_t *lm = &im->lookup_main;
1931   int i;
1932
1933   vlib_cli_output (vm, "Protocols handled by ip4_local");
1934   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1935     {
1936       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1937         {
1938           u32 node_index = vlib_get_node (vm,
1939                                           ip4_local_node.index)->
1940             next_nodes[lm->local_next_by_ip_protocol[i]];
1941           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1942                            format_vlib_node_name, vm, node_index);
1943         }
1944     }
1945   return 0;
1946 }
1947
1948
1949
1950 /*?
1951  * Display the set of protocols handled by the local IPv4 stack.
1952  *
1953  * @cliexpar
1954  * Example of how to display local protocol table:
1955  * @cliexstart{show ip local}
1956  * Protocols handled by ip4_local
1957  * 1
1958  * 17
1959  * 47
1960  * @cliexend
1961 ?*/
1962 /* *INDENT-OFF* */
1963 VLIB_CLI_COMMAND (show_ip_local, static) =
1964 {
1965   .path = "show ip local",
1966   .function = show_ip_local_command_fn,
1967   .short_help = "show ip local",
1968 };
1969 /* *INDENT-ON* */
1970
1971 always_inline uword
1972 ip4_arp_inline (vlib_main_t * vm,
1973                 vlib_node_runtime_t * node,
1974                 vlib_frame_t * frame, int is_glean)
1975 {
1976   vnet_main_t *vnm = vnet_get_main ();
1977   ip4_main_t *im = &ip4_main;
1978   ip_lookup_main_t *lm = &im->lookup_main;
1979   u32 *from, *to_next_drop;
1980   uword n_left_from, n_left_to_next_drop, next_index;
1981   u32 thread_index = vm->thread_index;
1982   u64 seed;
1983
1984   if (node->flags & VLIB_NODE_FLAG_TRACE)
1985     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1986
1987   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1988
1989   from = vlib_frame_vector_args (frame);
1990   n_left_from = frame->n_vectors;
1991   next_index = node->cached_next_index;
1992   if (next_index == IP4_ARP_NEXT_DROP)
1993     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1994
1995   while (n_left_from > 0)
1996     {
1997       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1998                            to_next_drop, n_left_to_next_drop);
1999
2000       while (n_left_from > 0 && n_left_to_next_drop > 0)
2001         {
2002           u32 pi0, bi0, adj_index0, sw_if_index0;
2003           ip_adjacency_t *adj0;
2004           vlib_buffer_t *p0, *b0;
2005           ip4_address_t resolve0;
2006           ethernet_arp_header_t *h0;
2007           vnet_hw_interface_t *hw_if0;
2008           u64 r0;
2009
2010           pi0 = from[0];
2011           p0 = vlib_get_buffer (vm, pi0);
2012
2013           from += 1;
2014           n_left_from -= 1;
2015           to_next_drop[0] = pi0;
2016           to_next_drop += 1;
2017           n_left_to_next_drop -= 1;
2018
2019           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2020           adj0 = adj_get (adj_index0);
2021
2022           if (is_glean)
2023             {
2024               /* resolve the packet's destination */
2025               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2026               resolve0 = ip0->dst_address;
2027             }
2028           else
2029             {
2030               /* resolve the incomplete adj */
2031               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2032             }
2033
2034           /* combine the address and interface for the hash key */
2035           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2036           r0 = (u64) resolve0.data_u32 << 32;
2037           r0 |= sw_if_index0;
2038
2039           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2040             {
2041               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2042               continue;
2043             }
2044
2045           /*
2046            * the adj has been updated to a rewrite but the node the DPO that got
2047            * us here hasn't - yet. no big deal. we'll drop while we wait.
2048            */
2049           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2050             {
2051               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2052               continue;
2053             }
2054
2055           /*
2056            * Can happen if the control-plane is programming tables
2057            * with traffic flowing; at least that's today's lame excuse.
2058            */
2059           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2060               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2061             {
2062               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2063               continue;
2064             }
2065           /* Send ARP request. */
2066           h0 =
2067             vlib_packet_template_get_packet (vm,
2068                                              &im->ip4_arp_request_packet_template,
2069                                              &bi0);
2070           /* Seems we're out of buffers */
2071           if (PREDICT_FALSE (!h0))
2072             {
2073               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2074               continue;
2075             }
2076
2077           b0 = vlib_get_buffer (vm, bi0);
2078
2079           /* copy the persistent fields from the original */
2080           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2081
2082           /* Add rewrite/encap string for ARP packet. */
2083           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2084
2085           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2086
2087           /* Src ethernet address in ARP header. */
2088           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2089                                   hw_if0->hw_address);
2090           if (is_glean)
2091             {
2092               /* The interface's source address is stashed in the Glean Adj */
2093               h0->ip4_over_ethernet[0].ip4 =
2094                 adj0->sub_type.glean.receive_addr.ip4;
2095             }
2096           else
2097             {
2098               /* Src IP address in ARP header. */
2099               if (ip4_src_address_for_packet (lm, sw_if_index0,
2100                                               &h0->ip4_over_ethernet[0].ip4))
2101                 {
2102                   /* No source address available */
2103                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2104                   vlib_buffer_free (vm, &bi0, 1);
2105                   continue;
2106                 }
2107             }
2108           h0->ip4_over_ethernet[1].ip4 = resolve0;
2109
2110           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2111
2112           vlib_buffer_copy_trace_flag (vm, p0, bi0);
2113           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2114           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2115
2116           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2117
2118           vlib_set_next_frame_buffer (vm, node,
2119                                       adj0->rewrite_header.next_index, bi0);
2120         }
2121
2122       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2123     }
2124
2125   return frame->n_vectors;
2126 }
2127
2128 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2129                              vlib_frame_t * frame)
2130 {
2131   return (ip4_arp_inline (vm, node, frame, 0));
2132 }
2133
2134 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2135                                vlib_frame_t * frame)
2136 {
2137   return (ip4_arp_inline (vm, node, frame, 1));
2138 }
2139
2140 static char *ip4_arp_error_strings[] = {
2141   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2142   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2143   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2144   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2145   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2146   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2147 };
2148
2149 /* *INDENT-OFF* */
2150 VLIB_REGISTER_NODE (ip4_arp_node) =
2151 {
2152   .name = "ip4-arp",
2153   .vector_size = sizeof (u32),
2154   .format_trace = format_ip4_forward_next_trace,
2155   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2156   .error_strings = ip4_arp_error_strings,
2157   .n_next_nodes = IP4_ARP_N_NEXT,
2158   .next_nodes =
2159   {
2160     [IP4_ARP_NEXT_DROP] = "error-drop",
2161   },
2162 };
2163
2164 VLIB_REGISTER_NODE (ip4_glean_node) =
2165 {
2166   .name = "ip4-glean",
2167   .vector_size = sizeof (u32),
2168   .format_trace = format_ip4_forward_next_trace,
2169   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2170   .error_strings = ip4_arp_error_strings,
2171   .n_next_nodes = IP4_ARP_N_NEXT,
2172   .next_nodes = {
2173   [IP4_ARP_NEXT_DROP] = "error-drop",
2174   },
2175 };
2176 /* *INDENT-ON* */
2177
2178 #define foreach_notrace_ip4_arp_error           \
2179 _(THROTTLED)                                    \
2180 _(RESOLVED)                                     \
2181 _(NO_BUFFERS)                                   \
2182 _(REQUEST_SENT)                                 \
2183 _(NON_ARP_ADJ)                                  \
2184 _(NO_SOURCE_ADDRESS)
2185
2186 static clib_error_t *
2187 arp_notrace_init (vlib_main_t * vm)
2188 {
2189   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2190
2191   /* don't trace ARP request packets */
2192 #define _(a)                                    \
2193     vnet_pcap_drop_trace_filter_add_del         \
2194         (rt->errors[IP4_ARP_ERROR_##a],         \
2195          1 /* is_add */);
2196   foreach_notrace_ip4_arp_error;
2197 #undef _
2198   return 0;
2199 }
2200
2201 VLIB_INIT_FUNCTION (arp_notrace_init);
2202
2203
2204 #ifndef CLIB_MARCH_VARIANT
2205 /* Send an ARP request to see if given destination is reachable on given interface. */
2206 clib_error_t *
2207 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2208                     u8 refresh)
2209 {
2210   vnet_main_t *vnm = vnet_get_main ();
2211   ip4_main_t *im = &ip4_main;
2212   ethernet_arp_header_t *h;
2213   ip4_address_t *src;
2214   ip_interface_address_t *ia;
2215   ip_adjacency_t *adj;
2216   vnet_hw_interface_t *hi;
2217   vnet_sw_interface_t *si;
2218   vlib_buffer_t *b;
2219   adj_index_t ai;
2220   u32 bi = 0;
2221   u8 unicast_rewrite = 0;
2222
2223   si = vnet_get_sw_interface (vnm, sw_if_index);
2224
2225   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2226     {
2227       return clib_error_return (0, "%U: interface %U down",
2228                                 format_ip4_address, dst,
2229                                 format_vnet_sw_if_index_name, vnm,
2230                                 sw_if_index);
2231     }
2232
2233   src =
2234     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2235   if (!src)
2236     {
2237       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2238       return clib_error_return
2239         (0,
2240          "no matching interface address for destination %U (interface %U)",
2241          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2242          sw_if_index);
2243     }
2244
2245   h = vlib_packet_template_get_packet (vm,
2246                                        &im->ip4_arp_request_packet_template,
2247                                        &bi);
2248
2249   if (!h)
2250     return clib_error_return (0, "ARP request packet allocation failed");
2251
2252   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2253   if (PREDICT_FALSE (!hi->hw_address))
2254     {
2255       return clib_error_return (0, "%U: interface %U do not support ip probe",
2256                                 format_ip4_address, dst,
2257                                 format_vnet_sw_if_index_name, vnm,
2258                                 sw_if_index);
2259     }
2260
2261   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2262
2263   h->ip4_over_ethernet[0].ip4 = src[0];
2264   h->ip4_over_ethernet[1].ip4 = dst[0];
2265
2266   b = vlib_get_buffer (vm, bi);
2267   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2268     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2269
2270   ip46_address_t nh = {
2271     .ip4 = *dst,
2272   };
2273
2274   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2275                             VNET_LINK_IP4, &nh, sw_if_index);
2276   adj = adj_get (ai);
2277
2278   /* Peer has been previously resolved, retrieve glean adj instead */
2279   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2280     {
2281       if (refresh)
2282         unicast_rewrite = 1;
2283       else
2284         {
2285           adj_unlock (ai);
2286           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2287                                       VNET_LINK_IP4, sw_if_index, &nh);
2288           adj = adj_get (ai);
2289         }
2290     }
2291
2292   /* Add encapsulation string for software interface (e.g. ethernet header). */
2293   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2294   if (unicast_rewrite)
2295     {
2296       u16 *etype = vlib_buffer_get_current (b) - 2;
2297       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2298     }
2299   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2300
2301   {
2302     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2303     u32 *to_next = vlib_frame_vector_args (f);
2304     to_next[0] = bi;
2305     f->n_vectors = 1;
2306     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2307   }
2308
2309   adj_unlock (ai);
2310   return /* no error */ 0;
2311 }
2312 #endif
2313
2314 typedef enum
2315 {
2316   IP4_REWRITE_NEXT_DROP,
2317   IP4_REWRITE_NEXT_ICMP_ERROR,
2318   IP4_REWRITE_NEXT_FRAGMENT,
2319   IP4_REWRITE_N_NEXT            /* Last */
2320 } ip4_rewrite_next_t;
2321
2322 /**
2323  * This bits of an IPv4 address to mask to construct a multicast
2324  * MAC address
2325  */
2326 #if CLIB_ARCH_IS_BIG_ENDIAN
2327 #define IP4_MCAST_ADDR_MASK 0x007fffff
2328 #else
2329 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2330 #endif
2331
2332 always_inline void
2333 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2334                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2335 {
2336   if (packet_len > adj_packet_bytes)
2337     {
2338       *error = IP4_ERROR_MTU_EXCEEDED;
2339       if (df)
2340         {
2341           icmp4_error_set_vnet_buffer
2342             (b, ICMP4_destination_unreachable,
2343              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2344              adj_packet_bytes);
2345           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2346         }
2347       else
2348         {
2349           /* IP fragmentation */
2350           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2351                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2352           *next = IP4_REWRITE_NEXT_FRAGMENT;
2353         }
2354     }
2355 }
2356
2357 /* Decrement TTL & update checksum.
2358    Works either endian, so no need for byte swap. */
2359 static_always_inline void
2360 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2361                             u32 * error)
2362 {
2363   i32 ttl;
2364   u32 checksum;
2365   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2366     {
2367       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2368       return;
2369     }
2370
2371   ttl = ip->ttl;
2372
2373   /* Input node should have reject packets with ttl 0. */
2374   ASSERT (ip->ttl > 0);
2375
2376   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2377   checksum += checksum >= 0xffff;
2378
2379   ip->checksum = checksum;
2380   ttl -= 1;
2381   ip->ttl = ttl;
2382
2383   /*
2384    * If the ttl drops below 1 when forwarding, generate
2385    * an ICMP response.
2386    */
2387   if (PREDICT_FALSE (ttl <= 0))
2388     {
2389       *error = IP4_ERROR_TIME_EXPIRED;
2390       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2391       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2392                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2393                                    0);
2394       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2395     }
2396
2397   /* Verify checksum. */
2398   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2399           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2400 }
2401
2402
2403 always_inline uword
2404 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2405                              vlib_node_runtime_t * node,
2406                              vlib_frame_t * frame,
2407                              int do_counters, int is_midchain, int is_mcast,
2408                              int do_gso)
2409 {
2410   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2411   u32 *from = vlib_frame_vector_args (frame);
2412   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2413   u16 nexts[VLIB_FRAME_SIZE], *next;
2414   u32 n_left_from;
2415   vlib_node_runtime_t *error_node =
2416     vlib_node_get_runtime (vm, ip4_input_node.index);
2417
2418   n_left_from = frame->n_vectors;
2419   u32 thread_index = vm->thread_index;
2420
2421   vlib_get_buffers (vm, from, bufs, n_left_from);
2422   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2423
2424   if (n_left_from >= 6)
2425     {
2426       int i;
2427       for (i = 2; i < 6; i++)
2428         vlib_prefetch_buffer_header (bufs[i], LOAD);
2429     }
2430
2431   next = nexts;
2432   b = bufs;
2433   while (n_left_from >= 8)
2434     {
2435       ip_adjacency_t *adj0, *adj1;
2436       ip4_header_t *ip0, *ip1;
2437       u32 rw_len0, error0, adj_index0;
2438       u32 rw_len1, error1, adj_index1;
2439       u32 tx_sw_if_index0, tx_sw_if_index1;
2440       u8 *p;
2441
2442       vlib_prefetch_buffer_header (b[6], LOAD);
2443       vlib_prefetch_buffer_header (b[7], LOAD);
2444
2445       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2446       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2447
2448       /*
2449        * pre-fetch the per-adjacency counters
2450        */
2451       if (do_counters)
2452         {
2453           vlib_prefetch_combined_counter (&adjacency_counters,
2454                                           thread_index, adj_index0);
2455           vlib_prefetch_combined_counter (&adjacency_counters,
2456                                           thread_index, adj_index1);
2457         }
2458
2459       ip0 = vlib_buffer_get_current (b[0]);
2460       ip1 = vlib_buffer_get_current (b[1]);
2461
2462       error0 = error1 = IP4_ERROR_NONE;
2463
2464       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2465       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2466
2467       /* Rewrite packet header and updates lengths. */
2468       adj0 = adj_get (adj_index0);
2469       adj1 = adj_get (adj_index1);
2470
2471       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2472       rw_len0 = adj0[0].rewrite_header.data_bytes;
2473       rw_len1 = adj1[0].rewrite_header.data_bytes;
2474       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2475       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2476
2477       p = vlib_buffer_get_current (b[2]);
2478       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2479       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2480
2481       p = vlib_buffer_get_current (b[3]);
2482       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2483       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2484
2485       /* Check MTU of outgoing interface. */
2486       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2487       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2488
2489       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2490         ip0_len = gso_mtu_sz (b[0]);
2491       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2492         ip1_len = gso_mtu_sz (b[1]);
2493
2494       ip4_mtu_check (b[0], ip0_len,
2495                      adj0[0].rewrite_header.max_l3_packet_bytes,
2496                      ip0->flags_and_fragment_offset &
2497                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2498                      next + 0, &error0);
2499       ip4_mtu_check (b[1], ip1_len,
2500                      adj1[0].rewrite_header.max_l3_packet_bytes,
2501                      ip1->flags_and_fragment_offset &
2502                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2503                      next + 1, &error1);
2504
2505       if (is_mcast)
2506         {
2507           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2508                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2509                     IP4_ERROR_SAME_INTERFACE : error0);
2510           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2511                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2512                     IP4_ERROR_SAME_INTERFACE : error1);
2513         }
2514
2515       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2516        * to see the IP header */
2517       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2518         {
2519           u32 next_index = adj0[0].rewrite_header.next_index;
2520           vlib_buffer_advance (b[0], -(word) rw_len0);
2521           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2522           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2523
2524           if (PREDICT_FALSE
2525               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2526             vnet_feature_arc_start (lm->output_feature_arc_index,
2527                                     tx_sw_if_index0, &next_index, b[0]);
2528           next[0] = next_index;
2529         }
2530       else
2531         {
2532           b[0]->error = error_node->errors[error0];
2533         }
2534       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2535         {
2536           u32 next_index = adj1[0].rewrite_header.next_index;
2537           vlib_buffer_advance (b[1], -(word) rw_len1);
2538
2539           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2540           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2541
2542           if (PREDICT_FALSE
2543               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2544             vnet_feature_arc_start (lm->output_feature_arc_index,
2545                                     tx_sw_if_index1, &next_index, b[1]);
2546           next[1] = next_index;
2547         }
2548       else
2549         {
2550           b[1]->error = error_node->errors[error1];
2551         }
2552       if (is_midchain)
2553         {
2554           calc_checksums (vm, b[0]);
2555           calc_checksums (vm, b[1]);
2556         }
2557       /* Guess we are only writing on simple Ethernet header. */
2558       vnet_rewrite_two_headers (adj0[0], adj1[0],
2559                                 ip0, ip1, sizeof (ethernet_header_t));
2560
2561       /*
2562        * Bump the per-adjacency counters
2563        */
2564       if (do_counters)
2565         {
2566           vlib_increment_combined_counter
2567             (&adjacency_counters,
2568              thread_index,
2569              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2570
2571           vlib_increment_combined_counter
2572             (&adjacency_counters,
2573              thread_index,
2574              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2575         }
2576
2577       if (is_midchain)
2578         {
2579           if (adj0->sub_type.midchain.fixup_func)
2580             adj0->sub_type.midchain.fixup_func
2581               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2582           if (adj1->sub_type.midchain.fixup_func)
2583             adj1->sub_type.midchain.fixup_func
2584               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2585         }
2586
2587       if (is_mcast)
2588         {
2589           /*
2590            * copy bytes from the IP address into the MAC rewrite
2591            */
2592           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2593                                       adj0->rewrite_header.dst_mcast_offset,
2594                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2595           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2596                                       adj1->rewrite_header.dst_mcast_offset,
2597                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2598         }
2599
2600       next += 2;
2601       b += 2;
2602       n_left_from -= 2;
2603     }
2604
2605   while (n_left_from > 0)
2606     {
2607       ip_adjacency_t *adj0;
2608       ip4_header_t *ip0;
2609       u32 rw_len0, adj_index0, error0;
2610       u32 tx_sw_if_index0;
2611
2612       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2613
2614       adj0 = adj_get (adj_index0);
2615
2616       if (do_counters)
2617         vlib_prefetch_combined_counter (&adjacency_counters,
2618                                         thread_index, adj_index0);
2619
2620       ip0 = vlib_buffer_get_current (b[0]);
2621
2622       error0 = IP4_ERROR_NONE;
2623
2624       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2625
2626
2627       /* Update packet buffer attributes/set output interface. */
2628       rw_len0 = adj0[0].rewrite_header.data_bytes;
2629       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2630
2631       /* Check MTU of outgoing interface. */
2632       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2633       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2634         ip0_len = gso_mtu_sz (b[0]);
2635
2636       ip4_mtu_check (b[0], ip0_len,
2637                      adj0[0].rewrite_header.max_l3_packet_bytes,
2638                      ip0->flags_and_fragment_offset &
2639                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2640                      next + 0, &error0);
2641
2642       if (is_mcast)
2643         {
2644           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2645                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2646                     IP4_ERROR_SAME_INTERFACE : error0);
2647         }
2648
2649       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2650        * to see the IP header */
2651       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2652         {
2653           u32 next_index = adj0[0].rewrite_header.next_index;
2654           vlib_buffer_advance (b[0], -(word) rw_len0);
2655           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2656           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2657
2658           if (PREDICT_FALSE
2659               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2660             vnet_feature_arc_start (lm->output_feature_arc_index,
2661                                     tx_sw_if_index0, &next_index, b[0]);
2662           next[0] = next_index;
2663         }
2664       else
2665         {
2666           b[0]->error = error_node->errors[error0];
2667         }
2668       if (is_midchain)
2669         {
2670           calc_checksums (vm, b[0]);
2671         }
2672       /* Guess we are only writing on simple Ethernet header. */
2673       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2674
2675       if (do_counters)
2676         vlib_increment_combined_counter
2677           (&adjacency_counters,
2678            thread_index, adj_index0, 1,
2679            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2680
2681       if (is_midchain)
2682         {
2683           if (adj0->sub_type.midchain.fixup_func)
2684             adj0->sub_type.midchain.fixup_func
2685               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2686         }
2687
2688       if (is_mcast)
2689         {
2690           /*
2691            * copy bytes from the IP address into the MAC rewrite
2692            */
2693           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2694                                       adj0->rewrite_header.dst_mcast_offset,
2695                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2696         }
2697
2698       next += 1;
2699       b += 1;
2700       n_left_from -= 1;
2701     }
2702
2703
2704   /* Need to do trace after rewrites to pick up new packet data. */
2705   if (node->flags & VLIB_NODE_FLAG_TRACE)
2706     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2707
2708   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2709   return frame->n_vectors;
2710 }
2711
2712 always_inline uword
2713 ip4_rewrite_inline (vlib_main_t * vm,
2714                     vlib_node_runtime_t * node,
2715                     vlib_frame_t * frame,
2716                     int do_counters, int is_midchain, int is_mcast)
2717 {
2718   vnet_main_t *vnm = vnet_get_main ();
2719   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2720     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2721                                         is_midchain, is_mcast,
2722                                         1 /* do_gso */ );
2723   else
2724     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2725                                         is_midchain, is_mcast,
2726                                         0 /* no do_gso */ );
2727 }
2728
2729
2730 /** @brief IPv4 rewrite node.
2731     @node ip4-rewrite
2732
2733     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2734     header checksum, fetch the ip adjacency, check the outbound mtu,
2735     apply the adjacency rewrite, and send pkts to the adjacency
2736     rewrite header's rewrite_next_index.
2737
2738     @param vm vlib_main_t corresponding to the current thread
2739     @param node vlib_node_runtime_t
2740     @param frame vlib_frame_t whose contents should be dispatched
2741
2742     @par Graph mechanics: buffer metadata, next index usage
2743
2744     @em Uses:
2745     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2746         - the rewrite adjacency index
2747     - <code>adj->lookup_next_index</code>
2748         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2749           the packet will be dropped.
2750     - <code>adj->rewrite_header</code>
2751         - Rewrite string length, rewrite string, next_index
2752
2753     @em Sets:
2754     - <code>b->current_data, b->current_length</code>
2755         - Updated net of applying the rewrite string
2756
2757     <em>Next Indices:</em>
2758     - <code> adj->rewrite_header.next_index </code>
2759       or @c ip4-drop
2760 */
2761
2762 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2763                                  vlib_frame_t * frame)
2764 {
2765   if (adj_are_counters_enabled ())
2766     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2767   else
2768     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2769 }
2770
2771 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2772                                        vlib_node_runtime_t * node,
2773                                        vlib_frame_t * frame)
2774 {
2775   if (adj_are_counters_enabled ())
2776     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2777   else
2778     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2779 }
2780
2781 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2782                                   vlib_node_runtime_t * node,
2783                                   vlib_frame_t * frame)
2784 {
2785   if (adj_are_counters_enabled ())
2786     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2787   else
2788     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2789 }
2790
2791 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2792                                        vlib_node_runtime_t * node,
2793                                        vlib_frame_t * frame)
2794 {
2795   if (adj_are_counters_enabled ())
2796     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2797   else
2798     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2799 }
2800
2801 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2802                                         vlib_node_runtime_t * node,
2803                                         vlib_frame_t * frame)
2804 {
2805   if (adj_are_counters_enabled ())
2806     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2807   else
2808     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2809 }
2810
2811 /* *INDENT-OFF* */
2812 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2813   .name = "ip4-rewrite",
2814   .vector_size = sizeof (u32),
2815
2816   .format_trace = format_ip4_rewrite_trace,
2817
2818   .n_next_nodes = IP4_REWRITE_N_NEXT,
2819   .next_nodes = {
2820     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2821     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2822     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2823   },
2824 };
2825
2826 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2827   .name = "ip4-rewrite-bcast",
2828   .vector_size = sizeof (u32),
2829
2830   .format_trace = format_ip4_rewrite_trace,
2831   .sibling_of = "ip4-rewrite",
2832 };
2833
2834 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2835   .name = "ip4-rewrite-mcast",
2836   .vector_size = sizeof (u32),
2837
2838   .format_trace = format_ip4_rewrite_trace,
2839   .sibling_of = "ip4-rewrite",
2840 };
2841
2842 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2843   .name = "ip4-mcast-midchain",
2844   .vector_size = sizeof (u32),
2845
2846   .format_trace = format_ip4_rewrite_trace,
2847   .sibling_of = "ip4-rewrite",
2848 };
2849
2850 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2851   .name = "ip4-midchain",
2852   .vector_size = sizeof (u32),
2853   .format_trace = format_ip4_forward_next_trace,
2854   .sibling_of =  "ip4-rewrite",
2855 };
2856 /* *INDENT-ON */
2857
2858 static int
2859 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2860 {
2861   ip4_fib_mtrie_t *mtrie0;
2862   ip4_fib_mtrie_leaf_t leaf0;
2863   u32 lbi0;
2864
2865   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2866
2867   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2868   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2869   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2870
2871   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2872
2873   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2874 }
2875
2876 static clib_error_t *
2877 test_lookup_command_fn (vlib_main_t * vm,
2878                         unformat_input_t * input, vlib_cli_command_t * cmd)
2879 {
2880   ip4_fib_t *fib;
2881   u32 table_id = 0;
2882   f64 count = 1;
2883   u32 n;
2884   int i;
2885   ip4_address_t ip4_base_address;
2886   u64 errors = 0;
2887
2888   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2889     {
2890       if (unformat (input, "table %d", &table_id))
2891         {
2892           /* Make sure the entry exists. */
2893           fib = ip4_fib_get (table_id);
2894           if ((fib) && (fib->index != table_id))
2895             return clib_error_return (0, "<fib-index> %d does not exist",
2896                                       table_id);
2897         }
2898       else if (unformat (input, "count %f", &count))
2899         ;
2900
2901       else if (unformat (input, "%U",
2902                          unformat_ip4_address, &ip4_base_address))
2903         ;
2904       else
2905         return clib_error_return (0, "unknown input `%U'",
2906                                   format_unformat_error, input);
2907     }
2908
2909   n = count;
2910
2911   for (i = 0; i < n; i++)
2912     {
2913       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2914         errors++;
2915
2916       ip4_base_address.as_u32 =
2917         clib_host_to_net_u32 (1 +
2918                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2919     }
2920
2921   if (errors)
2922     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2923   else
2924     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2925
2926   return 0;
2927 }
2928
2929 /*?
2930  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2931  * given FIB table to determine if there is a conflict with the
2932  * adjacency table. The fib-id can be determined by using the
2933  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2934  * of 0 is used.
2935  *
2936  * @todo This command uses fib-id, other commands use table-id (not
2937  * just a name, they are different indexes). Would like to change this
2938  * to table-id for consistency.
2939  *
2940  * @cliexpar
2941  * Example of how to run the test lookup command:
2942  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2943  * No errors in 2 lookups
2944  * @cliexend
2945 ?*/
2946 /* *INDENT-OFF* */
2947 VLIB_CLI_COMMAND (lookup_test_command, static) =
2948 {
2949   .path = "test lookup",
2950   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2951   .function = test_lookup_command_fn,
2952 };
2953 /* *INDENT-ON* */
2954
2955 #ifndef CLIB_MARCH_VARIANT
2956 int
2957 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2958 {
2959   u32 fib_index;
2960
2961   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2962
2963   if (~0 == fib_index)
2964     return VNET_API_ERROR_NO_SUCH_FIB;
2965
2966   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2967                                   flow_hash_config);
2968
2969   return 0;
2970 }
2971 #endif
2972
2973 static clib_error_t *
2974 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2975                              unformat_input_t * input,
2976                              vlib_cli_command_t * cmd)
2977 {
2978   int matched = 0;
2979   u32 table_id = 0;
2980   u32 flow_hash_config = 0;
2981   int rv;
2982
2983   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2984     {
2985       if (unformat (input, "table %d", &table_id))
2986         matched = 1;
2987 #define _(a,v) \
2988     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2989       foreach_flow_hash_bit
2990 #undef _
2991         else
2992         break;
2993     }
2994
2995   if (matched == 0)
2996     return clib_error_return (0, "unknown input `%U'",
2997                               format_unformat_error, input);
2998
2999   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3000   switch (rv)
3001     {
3002     case 0:
3003       break;
3004
3005     case VNET_API_ERROR_NO_SUCH_FIB:
3006       return clib_error_return (0, "no such FIB table %d", table_id);
3007
3008     default:
3009       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3010       break;
3011     }
3012
3013   return 0;
3014 }
3015
3016 /*?
3017  * Configure the set of IPv4 fields used by the flow hash.
3018  *
3019  * @cliexpar
3020  * Example of how to set the flow hash on a given table:
3021  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3022  * Example of display the configured flow hash:
3023  * @cliexstart{show ip fib}
3024  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3025  * 0.0.0.0/0
3026  *   unicast-ip4-chain
3027  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3028  *     [0] [@0]: dpo-drop ip6
3029  * 0.0.0.0/32
3030  *   unicast-ip4-chain
3031  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3032  *     [0] [@0]: dpo-drop ip6
3033  * 224.0.0.0/8
3034  *   unicast-ip4-chain
3035  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3036  *     [0] [@0]: dpo-drop ip6
3037  * 6.0.1.2/32
3038  *   unicast-ip4-chain
3039  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3040  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3041  * 7.0.0.1/32
3042  *   unicast-ip4-chain
3043  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3044  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3045  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3046  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3047  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3048  * 240.0.0.0/8
3049  *   unicast-ip4-chain
3050  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3051  *     [0] [@0]: dpo-drop ip6
3052  * 255.255.255.255/32
3053  *   unicast-ip4-chain
3054  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3055  *     [0] [@0]: dpo-drop ip6
3056  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3057  * 0.0.0.0/0
3058  *   unicast-ip4-chain
3059  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3060  *     [0] [@0]: dpo-drop ip6
3061  * 0.0.0.0/32
3062  *   unicast-ip4-chain
3063  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3064  *     [0] [@0]: dpo-drop ip6
3065  * 172.16.1.0/24
3066  *   unicast-ip4-chain
3067  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3068  *     [0] [@4]: ipv4-glean: af_packet0
3069  * 172.16.1.1/32
3070  *   unicast-ip4-chain
3071  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3072  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3073  * 172.16.1.2/32
3074  *   unicast-ip4-chain
3075  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3076  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3077  * 172.16.2.0/24
3078  *   unicast-ip4-chain
3079  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3080  *     [0] [@4]: ipv4-glean: af_packet1
3081  * 172.16.2.1/32
3082  *   unicast-ip4-chain
3083  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3084  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3085  * 224.0.0.0/8
3086  *   unicast-ip4-chain
3087  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3088  *     [0] [@0]: dpo-drop ip6
3089  * 240.0.0.0/8
3090  *   unicast-ip4-chain
3091  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3092  *     [0] [@0]: dpo-drop ip6
3093  * 255.255.255.255/32
3094  *   unicast-ip4-chain
3095  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3096  *     [0] [@0]: dpo-drop ip6
3097  * @cliexend
3098 ?*/
3099 /* *INDENT-OFF* */
3100 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3101 {
3102   .path = "set ip flow-hash",
3103   .short_help =
3104   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3105   .function = set_ip_flow_hash_command_fn,
3106 };
3107 /* *INDENT-ON* */
3108
3109 #ifndef CLIB_MARCH_VARIANT
3110 int
3111 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3112                              u32 table_index)
3113 {
3114   vnet_main_t *vnm = vnet_get_main ();
3115   vnet_interface_main_t *im = &vnm->interface_main;
3116   ip4_main_t *ipm = &ip4_main;
3117   ip_lookup_main_t *lm = &ipm->lookup_main;
3118   vnet_classify_main_t *cm = &vnet_classify_main;
3119   ip4_address_t *if_addr;
3120
3121   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3122     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3123
3124   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3125     return VNET_API_ERROR_NO_SUCH_ENTRY;
3126
3127   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3128   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3129
3130   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3131
3132   if (NULL != if_addr)
3133     {
3134       fib_prefix_t pfx = {
3135         .fp_len = 32,
3136         .fp_proto = FIB_PROTOCOL_IP4,
3137         .fp_addr.ip4 = *if_addr,
3138       };
3139       u32 fib_index;
3140
3141       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3142                                                        sw_if_index);
3143
3144
3145       if (table_index != (u32) ~ 0)
3146         {
3147           dpo_id_t dpo = DPO_INVALID;
3148
3149           dpo_set (&dpo,
3150                    DPO_CLASSIFY,
3151                    DPO_PROTO_IP4,
3152                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3153
3154           fib_table_entry_special_dpo_add (fib_index,
3155                                            &pfx,
3156                                            FIB_SOURCE_CLASSIFY,
3157                                            FIB_ENTRY_FLAG_NONE, &dpo);
3158           dpo_reset (&dpo);
3159         }
3160       else
3161         {
3162           fib_table_entry_special_remove (fib_index,
3163                                           &pfx, FIB_SOURCE_CLASSIFY);
3164         }
3165     }
3166
3167   return 0;
3168 }
3169 #endif
3170
3171 static clib_error_t *
3172 set_ip_classify_command_fn (vlib_main_t * vm,
3173                             unformat_input_t * input,
3174                             vlib_cli_command_t * cmd)
3175 {
3176   u32 table_index = ~0;
3177   int table_index_set = 0;
3178   u32 sw_if_index = ~0;
3179   int rv;
3180
3181   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3182     {
3183       if (unformat (input, "table-index %d", &table_index))
3184         table_index_set = 1;
3185       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3186                          vnet_get_main (), &sw_if_index))
3187         ;
3188       else
3189         break;
3190     }
3191
3192   if (table_index_set == 0)
3193     return clib_error_return (0, "classify table-index must be specified");
3194
3195   if (sw_if_index == ~0)
3196     return clib_error_return (0, "interface / subif must be specified");
3197
3198   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3199
3200   switch (rv)
3201     {
3202     case 0:
3203       break;
3204
3205     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3206       return clib_error_return (0, "No such interface");
3207
3208     case VNET_API_ERROR_NO_SUCH_ENTRY:
3209       return clib_error_return (0, "No such classifier table");
3210     }
3211   return 0;
3212 }
3213
3214 /*?
3215  * Assign a classification table to an interface. The classification
3216  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3217  * commands. Once the table is create, use this command to filter packets
3218  * on an interface.
3219  *
3220  * @cliexpar
3221  * Example of how to assign a classification table to an interface:
3222  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3223 ?*/
3224 /* *INDENT-OFF* */
3225 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3226 {
3227     .path = "set ip classify",
3228     .short_help =
3229     "set ip classify intfc <interface> table-index <classify-idx>",
3230     .function = set_ip_classify_command_fn,
3231 };
3232 /* *INDENT-ON* */
3233
3234 static clib_error_t *
3235 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3236 {
3237   ip4_main_t *im = &ip4_main;
3238   uword heapsize = 0;
3239
3240   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3241     {
3242       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3243         ;
3244       else
3245         return clib_error_return (0,
3246                                   "invalid heap-size parameter `%U'",
3247                                   format_unformat_error, input);
3248     }
3249
3250   im->mtrie_heap_size = heapsize;
3251
3252   return 0;
3253 }
3254
3255 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3256
3257 /*
3258  * fd.io coding-style-patch-verification: ON
3259  *
3260  * Local Variables:
3261  * eval: (c-set-style "gnu")
3262  * End:
3263  */