fib: midchain adjacency optimisations
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
384   if (a->address_length <= 30)
385     {
386       pfx_special.fp_len = a->address_length;
387       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
388
389       /* set the glean route for the prefix */
390       fib_table_entry_update_one_path (fib_index, &pfx_special,
391                                        FIB_SOURCE_INTERFACE,
392                                        (FIB_ENTRY_FLAG_CONNECTED |
393                                         FIB_ENTRY_FLAG_ATTACHED),
394                                        DPO_PROTO_IP4,
395                                        /* No next-hop address */
396                                        NULL,
397                                        sw_if_index,
398                                        /* invalid FIB index */
399                                        ~0,
400                                        1,
401                                        /* no out-label stack */
402                                        NULL,
403                                        FIB_ROUTE_PATH_FLAG_NONE);
404
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if:
532    * - deleting last intf addr in prefix
533    * - deleting intf addr used as default source address in glean adjacency
534    *
535    * We're done now otherwise
536    */
537   if ((if_prefix->ref_count > 0) &&
538       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
539     return;
540
541   /* length <= 30, delete glean route, first address, last address */
542   if (address_length <= 30)
543     {
544
545       /* remove glean route for prefix */
546       pfx_special.fp_addr.ip4 = *address;
547       pfx_special.fp_len = address_length;
548       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
549
550       /* if no more intf addresses in prefix, remove other special routes */
551       if (!if_prefix->ref_count)
552         {
553           /* first address in prefix */
554           pfx_special.fp_addr.ip4.as_u32 =
555             address->as_u32 & im->fib_masks[address_length];
556           pfx_special.fp_len = 32;
557
558           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559           fib_table_entry_special_remove (fib_index,
560                                           &pfx_special,
561                                           FIB_SOURCE_INTERFACE);
562
563           /* prefix broadcast address */
564           pfx_special.fp_addr.ip4.as_u32 =
565             address->as_u32 | ~im->fib_masks[address_length];
566           pfx_special.fp_len = 32;
567
568           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
569           fib_table_entry_special_remove (fib_index,
570                                           &pfx_special,
571                                           FIB_SOURCE_INTERFACE);
572         }
573       else
574         /* default source addr just got deleted, find another */
575         {
576           ip_interface_address_t *new_src_ia = NULL;
577           ip4_address_t *new_src_addr = NULL;
578
579           new_src_addr =
580             ip4_interface_address_matching_destination
581               (im, address, sw_if_index, &new_src_ia);
582
583           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
584
585           pfx_special.fp_len = address_length;
586           pfx_special.fp_addr.ip4 = *new_src_addr;
587
588           /* set new glean route for the prefix */
589           fib_table_entry_update_one_path (fib_index, &pfx_special,
590                                            FIB_SOURCE_INTERFACE,
591                                            (FIB_ENTRY_FLAG_CONNECTED |
592                                             FIB_ENTRY_FLAG_ATTACHED),
593                                            DPO_PROTO_IP4,
594                                            /* No next-hop address */
595                                            NULL,
596                                            sw_if_index,
597                                            /* invalid FIB index */
598                                            ~0,
599                                            1,
600                                            /* no out-label stack */
601                                            NULL,
602                                            FIB_ROUTE_PATH_FLAG_NONE);
603           return;
604         }
605     }
606   /* length == 31, delete attached route for the other address */
607   else if (address_length == 31)
608     {
609       pfx_special.fp_addr.ip4.as_u32 =
610         address->as_u32 ^ clib_host_to_net_u32(1);
611
612       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
613     }
614
615   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
616   pool_put (lm->if_prefix_pool, if_prefix);
617 }
618
619 static void
620 ip4_del_interface_routes (u32 sw_if_index,
621                           ip4_main_t * im,
622                           u32 fib_index,
623                           ip4_address_t * address, u32 address_length)
624 {
625   fib_prefix_t pfx = {
626     .fp_len = address_length,
627     .fp_proto = FIB_PROTOCOL_IP4,
628     .fp_addr.ip4 = *address,
629   };
630
631   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
632                                    address, address_length);
633
634   pfx.fp_len = 32;
635   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
636 }
637
638 #ifndef CLIB_MARCH_VARIANT
639 void
640 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
641 {
642   ip4_main_t *im = &ip4_main;
643
644   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
645
646   /*
647    * enable/disable only on the 1<->0 transition
648    */
649   if (is_enable)
650     {
651       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
652         return;
653     }
654   else
655     {
656       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
657       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
658         return;
659     }
660   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661                                !is_enable, 0, 0);
662
663
664   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
665                                sw_if_index, !is_enable, 0, 0);
666
667   {
668     ip4_enable_disable_interface_callback_t *cb;
669     vec_foreach (cb, im->enable_disable_interface_callbacks)
670       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671   }
672 }
673
674 static clib_error_t *
675 ip4_add_del_interface_address_internal (vlib_main_t * vm,
676                                         u32 sw_if_index,
677                                         ip4_address_t * address,
678                                         u32 address_length, u32 is_del)
679 {
680   vnet_main_t *vnm = vnet_get_main ();
681   ip4_main_t *im = &ip4_main;
682   ip_lookup_main_t *lm = &im->lookup_main;
683   clib_error_t *error = 0;
684   u32 if_address_index;
685   ip4_address_fib_t ip4_af, *addr_fib = 0;
686
687   /* local0 interface doesn't support IP addressing  */
688   if (sw_if_index == 0)
689     {
690       return
691        clib_error_create ("local0 interface doesn't support IP addressing");
692     }
693
694   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
695   ip4_addr_fib_init (&ip4_af, address,
696                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
697   vec_add1 (addr_fib, ip4_af);
698
699   /*
700    * there is no support for adj-fib handling in the presence of overlapping
701    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
702    * most routers do.
703    */
704   /* *INDENT-OFF* */
705   if (!is_del)
706     {
707       /* When adding an address check that it does not conflict
708          with an existing address on any interface in this table. */
709       ip_interface_address_t *ia;
710       vnet_sw_interface_t *sif;
711
712       pool_foreach(sif, vnm->interface_main.sw_interfaces,
713       ({
714           if (im->fib_index_by_sw_if_index[sw_if_index] ==
715               im->fib_index_by_sw_if_index[sif->sw_if_index])
716             {
717               foreach_ip_interface_address
718                 (&im->lookup_main, ia, sif->sw_if_index,
719                  0 /* honor unnumbered */ ,
720                  ({
721                    ip4_address_t * x =
722                      ip_interface_address_get_address
723                      (&im->lookup_main, ia);
724
725                    if (ip4_destination_matches_route
726                        (im, address, x, ia->address_length) ||
727                        ip4_destination_matches_route (im,
728                                                       x,
729                                                       address,
730                                                       address_length))
731                      {
732                        /* an intf may have >1 addr from the same prefix */
733                        if ((sw_if_index == sif->sw_if_index) &&
734                            (ia->address_length == address_length) &&
735                            (x->as_u32 != address->as_u32))
736                          continue;
737
738                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
739                          /* if the address we're comparing against is stale
740                           * then the CP has not added this one back yet, maybe
741                           * it never will, so we have to assume it won't and
742                           * ignore it. if it does add it back, then it will fail
743                           * because this one is now present */
744                          continue;
745
746                        /* error if the length or intf was different */
747                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
748
749                        error = clib_error_create
750                          ("failed to add %U on %U which conflicts with %U for interface %U",
751                           format_ip4_address_and_length, address,
752                           address_length,
753                           format_vnet_sw_if_index_name, vnm,
754                           sw_if_index,
755                           format_ip4_address_and_length, x,
756                           ia->address_length,
757                           format_vnet_sw_if_index_name, vnm,
758                           sif->sw_if_index);
759                        goto done;
760                      }
761                  }));
762             }
763       }));
764     }
765   /* *INDENT-ON* */
766
767   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
768
769   if (is_del)
770     {
771       if (~0 == if_address_index)
772         {
773           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
774           error = clib_error_create ("%U not found for interface %U",
775                                      lm->format_address_and_length,
776                                      addr_fib, address_length,
777                                      format_vnet_sw_if_index_name, vnm,
778                                      sw_if_index);
779           goto done;
780         }
781
782       ip_interface_address_del (lm, if_address_index, addr_fib);
783     }
784   else
785     {
786       if (~0 != if_address_index)
787         {
788           ip_interface_address_t *ia;
789
790           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
791
792           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
793             {
794               if (ia->sw_if_index == sw_if_index)
795                 {
796                   /* re-adding an address during the replace action.
797                    * consdier this the update. clear the flag and
798                    * we're done */
799                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
800                   goto done;
801                 }
802               else
803                 {
804                   /* The prefix is moving from one interface to another.
805                    * delete the stale and add the new */
806                   ip4_add_del_interface_address_internal (vm,
807                                                           ia->sw_if_index,
808                                                           address,
809                                                           address_length, 1);
810                   ia = NULL;
811                   error = ip_interface_address_add (lm, sw_if_index,
812                                                     addr_fib, address_length,
813                                                     &if_address_index);
814                 }
815             }
816           else
817             {
818               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
819               error = clib_error_create
820                 ("Prefix %U already found on interface %U",
821                  lm->format_address_and_length, addr_fib, address_length,
822                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
823             }
824         }
825       else
826         error = ip_interface_address_add (lm, sw_if_index,
827                                           addr_fib, address_length,
828                                           &if_address_index);
829     }
830
831   if (error)
832     goto done;
833
834   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
835   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
836
837   /* intf addr routes are added/deleted on admin up/down */
838   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
839     {
840       if (is_del)
841         ip4_del_interface_routes (sw_if_index,
842                                   im, ip4_af.fib_index, address,
843                                   address_length);
844       else
845         ip4_add_interface_routes (sw_if_index,
846                                   im, ip4_af.fib_index,
847                                   pool_elt_at_index
848                                   (lm->if_address_pool, if_address_index));
849     }
850
851   ip4_add_del_interface_address_callback_t *cb;
852   vec_foreach (cb, im->add_del_interface_address_callbacks)
853     cb->function (im, cb->function_opaque, sw_if_index,
854                   address, address_length, if_address_index, is_del);
855
856 done:
857   vec_free (addr_fib);
858   return error;
859 }
860
861 clib_error_t *
862 ip4_add_del_interface_address (vlib_main_t * vm,
863                                u32 sw_if_index,
864                                ip4_address_t * address,
865                                u32 address_length, u32 is_del)
866 {
867   return ip4_add_del_interface_address_internal
868     (vm, sw_if_index, address, address_length, is_del);
869 }
870
871 void
872 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
873 {
874   ip_interface_address_t *ia;
875   ip4_main_t *im;
876
877   im = &ip4_main;
878
879   /*
880    * when directed broadcast is enabled, the subnet braodcast route will forward
881    * packets using an adjacency with a broadcast MAC. otherwise it drops
882    */
883   /* *INDENT-OFF* */
884   foreach_ip_interface_address(&im->lookup_main, ia,
885                                sw_if_index, 0,
886      ({
887        if (ia->address_length <= 30)
888          {
889            ip4_address_t *ipa;
890
891            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
892
893            fib_prefix_t pfx = {
894              .fp_len = 32,
895              .fp_proto = FIB_PROTOCOL_IP4,
896              .fp_addr = {
897                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
898              },
899            };
900
901            ip4_add_subnet_bcast_route
902              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
903                                                   sw_if_index),
904               &pfx, sw_if_index);
905          }
906      }));
907   /* *INDENT-ON* */
908 }
909 #endif
910
911 static clib_error_t *
912 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
913 {
914   ip4_main_t *im = &ip4_main;
915   ip_interface_address_t *ia;
916   ip4_address_t *a;
917   u32 is_admin_up, fib_index;
918
919   /* Fill in lookup tables with default table (0). */
920   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
921
922   vec_validate_init_empty (im->
923                            lookup_main.if_address_pool_index_by_sw_if_index,
924                            sw_if_index, ~0);
925
926   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
927
928   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
929
930   /* *INDENT-OFF* */
931   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
932                                 0 /* honor unnumbered */,
933   ({
934     a = ip_interface_address_get_address (&im->lookup_main, ia);
935     if (is_admin_up)
936       ip4_add_interface_routes (sw_if_index,
937                                 im, fib_index,
938                                 ia);
939     else
940       ip4_del_interface_routes (sw_if_index,
941                                 im, fib_index,
942                                 a, ia->address_length);
943   }));
944   /* *INDENT-ON* */
945
946   return 0;
947 }
948
949 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
950
951 /* Built-in ip4 unicast rx feature path definition */
952 /* *INDENT-OFF* */
953 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
957   .last_in_arc = "ip4-lookup",
958   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
959 };
960
961 VNET_FEATURE_INIT (ip4_flow_classify, static) =
962 {
963   .arc_name = "ip4-unicast",
964   .node_name = "ip4-flow-classify",
965   .runs_before = VNET_FEATURES ("ip4-inacl"),
966 };
967
968 VNET_FEATURE_INIT (ip4_inacl, static) =
969 {
970   .arc_name = "ip4-unicast",
971   .node_name = "ip4-inacl",
972   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
973 };
974
975 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
976 {
977   .arc_name = "ip4-unicast",
978   .node_name = "ip4-source-and-port-range-check-rx",
979   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
980 };
981
982 VNET_FEATURE_INIT (ip4_policer_classify, static) =
983 {
984   .arc_name = "ip4-unicast",
985   .node_name = "ip4-policer-classify",
986   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
987 };
988
989 VNET_FEATURE_INIT (ip4_ipsec, static) =
990 {
991   .arc_name = "ip4-unicast",
992   .node_name = "ipsec4-input-feature",
993   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
994 };
995
996 VNET_FEATURE_INIT (ip4_vpath, static) =
997 {
998   .arc_name = "ip4-unicast",
999   .node_name = "vpath-input-ip4",
1000   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1001 };
1002
1003 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1004 {
1005   .arc_name = "ip4-unicast",
1006   .node_name = "ip4-vxlan-bypass",
1007   .runs_before = VNET_FEATURES ("ip4-lookup"),
1008 };
1009
1010 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1011 {
1012   .arc_name = "ip4-unicast",
1013   .node_name = "ip4-not-enabled",
1014   .runs_before = VNET_FEATURES ("ip4-lookup"),
1015 };
1016
1017 VNET_FEATURE_INIT (ip4_lookup, static) =
1018 {
1019   .arc_name = "ip4-unicast",
1020   .node_name = "ip4-lookup",
1021   .runs_before = 0,     /* not before any other features */
1022 };
1023
1024 /* Built-in ip4 multicast rx feature path definition */
1025 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1026 {
1027   .arc_name = "ip4-multicast",
1028   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1029   .last_in_arc = "ip4-mfib-forward-lookup",
1030   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1031 };
1032
1033 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1034 {
1035   .arc_name = "ip4-multicast",
1036   .node_name = "vpath-input-ip4",
1037   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1038 };
1039
1040 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1041 {
1042   .arc_name = "ip4-multicast",
1043   .node_name = "ip4-not-enabled",
1044   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1045 };
1046
1047 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1048 {
1049   .arc_name = "ip4-multicast",
1050   .node_name = "ip4-mfib-forward-lookup",
1051   .runs_before = 0,     /* last feature */
1052 };
1053
1054 /* Source and port-range check ip4 tx feature path definition */
1055 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1056 {
1057   .arc_name = "ip4-output",
1058   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1059   .last_in_arc = "interface-output",
1060   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1061 };
1062
1063 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1064 {
1065   .arc_name = "ip4-output",
1066   .node_name = "ip4-source-and-port-range-check-tx",
1067   .runs_before = VNET_FEATURES ("ip4-outacl"),
1068 };
1069
1070 VNET_FEATURE_INIT (ip4_outacl, static) =
1071 {
1072   .arc_name = "ip4-output",
1073   .node_name = "ip4-outacl",
1074   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1075 };
1076
1077 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1078 {
1079   .arc_name = "ip4-output",
1080   .node_name = "ipsec4-output-feature",
1081   .runs_before = VNET_FEATURES ("interface-output"),
1082 };
1083
1084 /* Built-in ip4 tx feature path definition */
1085 VNET_FEATURE_INIT (ip4_interface_output, static) =
1086 {
1087   .arc_name = "ip4-output",
1088   .node_name = "interface-output",
1089   .runs_before = 0,     /* not before any other features */
1090 };
1091 /* *INDENT-ON* */
1092
1093 static clib_error_t *
1094 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1095 {
1096   ip4_main_t *im = &ip4_main;
1097
1098   /* Fill in lookup tables with default table (0). */
1099   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1100   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1101
1102   if (!is_add)
1103     {
1104       ip4_main_t *im4 = &ip4_main;
1105       ip_lookup_main_t *lm4 = &im4->lookup_main;
1106       ip_interface_address_t *ia = 0;
1107       ip4_address_t *address;
1108       vlib_main_t *vm = vlib_get_main ();
1109
1110       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1111       /* *INDENT-OFF* */
1112       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1113       ({
1114         address = ip_interface_address_get_address (lm4, ia);
1115         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1116       }));
1117       /* *INDENT-ON* */
1118       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1119     }
1120
1121   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1122                                is_add, 0, 0);
1123
1124   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1125                                sw_if_index, is_add, 0, 0);
1126
1127   return /* no error */ 0;
1128 }
1129
1130 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1131
1132 /* Global IP4 main. */
1133 #ifndef CLIB_MARCH_VARIANT
1134 ip4_main_t ip4_main;
1135 #endif /* CLIB_MARCH_VARIANT */
1136
1137 static clib_error_t *
1138 ip4_lookup_init (vlib_main_t * vm)
1139 {
1140   ip4_main_t *im = &ip4_main;
1141   clib_error_t *error;
1142   uword i;
1143
1144   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1145     return error;
1146   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1147     return (error);
1148   if ((error = vlib_call_init_function (vm, fib_module_init)))
1149     return error;
1150   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1151     return error;
1152
1153   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1154     {
1155       u32 m;
1156
1157       if (i < 32)
1158         m = pow2_mask (i) << (32 - i);
1159       else
1160         m = ~0;
1161       im->fib_masks[i] = clib_host_to_net_u32 (m);
1162     }
1163
1164   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1165
1166   /* Create FIB with index 0 and table id of 0. */
1167   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1168                                      FIB_SOURCE_DEFAULT_ROUTE);
1169   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1170                                       MFIB_SOURCE_DEFAULT_ROUTE);
1171
1172   {
1173     pg_node_t *pn;
1174     pn = pg_get_node (ip4_lookup_node.index);
1175     pn->unformat_edit = unformat_pg_ip4_header;
1176   }
1177
1178   {
1179     ethernet_arp_header_t h;
1180
1181     clib_memset (&h, 0, sizeof (h));
1182
1183 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1184 #define _8(f,v) h.f = v;
1185     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1186     _16 (l3_type, ETHERNET_TYPE_IP4);
1187     _8 (n_l2_address_bytes, 6);
1188     _8 (n_l3_address_bytes, 4);
1189     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1190 #undef _16
1191 #undef _8
1192
1193     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1194                                /* data */ &h,
1195                                sizeof (h),
1196                                /* alloc chunk size */ 8,
1197                                "ip4 arp");
1198   }
1199
1200   return error;
1201 }
1202
1203 VLIB_INIT_FUNCTION (ip4_lookup_init);
1204
1205 typedef struct
1206 {
1207   /* Adjacency taken. */
1208   u32 dpo_index;
1209   u32 flow_hash;
1210   u32 fib_index;
1211
1212   /* Packet data, possibly *after* rewrite. */
1213   u8 packet_data[64 - 1 * sizeof (u32)];
1214 }
1215 ip4_forward_next_trace_t;
1216
1217 #ifndef CLIB_MARCH_VARIANT
1218 u8 *
1219 format_ip4_forward_next_trace (u8 * s, va_list * args)
1220 {
1221   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1222   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1223   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1224   u32 indent = format_get_indent (s);
1225   s = format (s, "%U%U",
1226               format_white_space, indent,
1227               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1228   return s;
1229 }
1230 #endif
1231
1232 static u8 *
1233 format_ip4_lookup_trace (u8 * s, va_list * args)
1234 {
1235   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1236   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1237   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1238   u32 indent = format_get_indent (s);
1239
1240   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1241               t->fib_index, t->dpo_index, t->flow_hash);
1242   s = format (s, "\n%U%U",
1243               format_white_space, indent,
1244               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1245   return s;
1246 }
1247
1248 static u8 *
1249 format_ip4_rewrite_trace (u8 * s, va_list * args)
1250 {
1251   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1252   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1253   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1254   u32 indent = format_get_indent (s);
1255
1256   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1257               t->fib_index, t->dpo_index, format_ip_adjacency,
1258               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1259   s = format (s, "\n%U%U",
1260               format_white_space, indent,
1261               format_ip_adjacency_packet_data,
1262               t->packet_data, sizeof (t->packet_data));
1263   return s;
1264 }
1265
1266 #ifndef CLIB_MARCH_VARIANT
1267 /* Common trace function for all ip4-forward next nodes. */
1268 void
1269 ip4_forward_next_trace (vlib_main_t * vm,
1270                         vlib_node_runtime_t * node,
1271                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1272 {
1273   u32 *from, n_left;
1274   ip4_main_t *im = &ip4_main;
1275
1276   n_left = frame->n_vectors;
1277   from = vlib_frame_vector_args (frame);
1278
1279   while (n_left >= 4)
1280     {
1281       u32 bi0, bi1;
1282       vlib_buffer_t *b0, *b1;
1283       ip4_forward_next_trace_t *t0, *t1;
1284
1285       /* Prefetch next iteration. */
1286       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1287       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1288
1289       bi0 = from[0];
1290       bi1 = from[1];
1291
1292       b0 = vlib_get_buffer (vm, bi0);
1293       b1 = vlib_get_buffer (vm, bi1);
1294
1295       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1296         {
1297           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1298           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1299           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1300           t0->fib_index =
1301             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1302              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1303             vec_elt (im->fib_index_by_sw_if_index,
1304                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1305
1306           clib_memcpy_fast (t0->packet_data,
1307                             vlib_buffer_get_current (b0),
1308                             sizeof (t0->packet_data));
1309         }
1310       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1311         {
1312           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1313           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1314           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1315           t1->fib_index =
1316             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1317              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1318             vec_elt (im->fib_index_by_sw_if_index,
1319                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1320           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1321                             sizeof (t1->packet_data));
1322         }
1323       from += 2;
1324       n_left -= 2;
1325     }
1326
1327   while (n_left >= 1)
1328     {
1329       u32 bi0;
1330       vlib_buffer_t *b0;
1331       ip4_forward_next_trace_t *t0;
1332
1333       bi0 = from[0];
1334
1335       b0 = vlib_get_buffer (vm, bi0);
1336
1337       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1338         {
1339           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1340           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1341           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1342           t0->fib_index =
1343             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1344              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1345             vec_elt (im->fib_index_by_sw_if_index,
1346                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1347           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1348                             sizeof (t0->packet_data));
1349         }
1350       from += 1;
1351       n_left -= 1;
1352     }
1353 }
1354
1355 /* Compute TCP/UDP/ICMP4 checksum in software. */
1356 u16
1357 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1358                               ip4_header_t * ip0)
1359 {
1360   ip_csum_t sum0;
1361   u32 ip_header_length, payload_length_host_byte_order;
1362
1363   /* Initialize checksum with ip header. */
1364   ip_header_length = ip4_header_bytes (ip0);
1365   payload_length_host_byte_order =
1366     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1367   sum0 =
1368     clib_host_to_net_u32 (payload_length_host_byte_order +
1369                           (ip0->protocol << 16));
1370
1371   if (BITS (uword) == 32)
1372     {
1373       sum0 =
1374         ip_csum_with_carry (sum0,
1375                             clib_mem_unaligned (&ip0->src_address, u32));
1376       sum0 =
1377         ip_csum_with_carry (sum0,
1378                             clib_mem_unaligned (&ip0->dst_address, u32));
1379     }
1380   else
1381     sum0 =
1382       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1383
1384   return ip_calculate_l4_checksum (vm, p0, sum0,
1385                                    payload_length_host_byte_order, (u8 *) ip0,
1386                                    ip_header_length, NULL);
1387 }
1388
1389 u32
1390 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1391 {
1392   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1393   udp_header_t *udp0;
1394   u16 sum16;
1395
1396   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1397           || ip0->protocol == IP_PROTOCOL_UDP);
1398
1399   udp0 = (void *) (ip0 + 1);
1400   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1401     {
1402       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1403                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1404       return p0->flags;
1405     }
1406
1407   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1408
1409   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1410                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1411
1412   return p0->flags;
1413 }
1414 #endif
1415
1416 /* *INDENT-OFF* */
1417 VNET_FEATURE_ARC_INIT (ip4_local) =
1418 {
1419   .arc_name  = "ip4-local",
1420   .start_nodes = VNET_FEATURES ("ip4-local"),
1421   .last_in_arc = "ip4-local-end-of-arc",
1422 };
1423 /* *INDENT-ON* */
1424
1425 static inline void
1426 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1427                             ip4_header_t * ip, u8 is_udp, u8 * error,
1428                             u8 * good_tcp_udp)
1429 {
1430   u32 flags0;
1431   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1432   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1433   if (is_udp)
1434     {
1435       udp_header_t *udp;
1436       u32 ip_len, udp_len;
1437       i32 len_diff;
1438       udp = ip4_next_header (ip);
1439       /* Verify UDP length. */
1440       ip_len = clib_net_to_host_u16 (ip->length);
1441       udp_len = clib_net_to_host_u16 (udp->length);
1442
1443       len_diff = ip_len - udp_len;
1444       *good_tcp_udp &= len_diff >= 0;
1445       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1446     }
1447 }
1448
1449 #define ip4_local_csum_is_offloaded(_b)                                 \
1450     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1451         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1452
1453 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1454     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1455         || ip4_local_csum_is_offloaded (_b)))
1456
1457 #define ip4_local_csum_is_valid(_b)                                     \
1458     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1459         || (ip4_local_csum_is_offloaded (_b))) != 0
1460
1461 static inline void
1462 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1463                          ip4_header_t * ih, u8 * error)
1464 {
1465   u8 is_udp, is_tcp_udp, good_tcp_udp;
1466
1467   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1468   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1469
1470   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1471     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1472   else
1473     good_tcp_udp = ip4_local_csum_is_valid (b);
1474
1475   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1476   *error = (is_tcp_udp && !good_tcp_udp
1477             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1478 }
1479
1480 static inline void
1481 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1482                             ip4_header_t ** ih, u8 * error)
1483 {
1484   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1485
1486   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1487   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1488
1489   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1490   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1491
1492   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1493   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1494
1495   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1496                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1497     {
1498       if (is_tcp_udp[0])
1499         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1500                                     &good_tcp_udp[0]);
1501       if (is_tcp_udp[1])
1502         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1503                                     &good_tcp_udp[1]);
1504     }
1505
1506   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1507               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1508   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1509               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1510 }
1511
1512 static inline void
1513 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1514                               vlib_buffer_t * b, u16 * next, u8 error,
1515                               u8 head_of_feature_arc)
1516 {
1517   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1518   u32 next_index;
1519
1520   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1521   b->error = error ? error_node->errors[error] : 0;
1522   if (head_of_feature_arc)
1523     {
1524       next_index = *next;
1525       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1526         {
1527           vnet_feature_arc_start (arc_index,
1528                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1529                                   &next_index, b);
1530           *next = next_index;
1531         }
1532     }
1533 }
1534
1535 typedef struct
1536 {
1537   ip4_address_t src;
1538   u32 lbi;
1539   u8 error;
1540   u8 first;
1541 } ip4_local_last_check_t;
1542
1543 static inline void
1544 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1545                      ip4_local_last_check_t * last_check, u8 * error0)
1546 {
1547   ip4_fib_mtrie_leaf_t leaf0;
1548   ip4_fib_mtrie_t *mtrie0;
1549   const dpo_id_t *dpo0;
1550   load_balance_t *lb0;
1551   u32 lbi0;
1552
1553   vnet_buffer (b)->ip.fib_index =
1554     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1555     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1556
1557   /*
1558    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1559    *  adjacency for the destination address (the local interface address).
1560    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1561    *  adjacency for the source address (the remote sender's address)
1562    */
1563   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1564       last_check->first)
1565     {
1566       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1567       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1568       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1569       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1570       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1571
1572       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1573         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1574       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1575
1576       lb0 = load_balance_get (lbi0);
1577       dpo0 = load_balance_get_bucket_i (lb0, 0);
1578
1579       /*
1580        * Must have a route to source otherwise we drop the packet.
1581        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1582        *
1583        * The checks are:
1584        *  - the source is a recieve => it's from us => bogus, do this
1585        *    first since it sets a different error code.
1586        *  - uRPF check for any route to source - accept if passes.
1587        *  - allow packets destined to the broadcast address from unknown sources
1588        */
1589
1590       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1591                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1592                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1593       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1594                   && !fib_urpf_check_size (lb0->lb_urpf)
1595                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1596                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1597
1598       last_check->src.as_u32 = ip0->src_address.as_u32;
1599       last_check->lbi = lbi0;
1600       last_check->error = *error0;
1601       last_check->first = 0;
1602     }
1603   else
1604     {
1605       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1606         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1607       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1608       *error0 = last_check->error;
1609     }
1610 }
1611
1612 static inline void
1613 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1614                         ip4_local_last_check_t * last_check, u8 * error)
1615 {
1616   ip4_fib_mtrie_leaf_t leaf[2];
1617   ip4_fib_mtrie_t *mtrie[2];
1618   const dpo_id_t *dpo[2];
1619   load_balance_t *lb[2];
1620   u32 not_last_hit;
1621   u32 lbi[2];
1622
1623   not_last_hit = last_check->first;
1624   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1625   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1626
1627   vnet_buffer (b[0])->ip.fib_index =
1628     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1629     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1630     vnet_buffer (b[0])->ip.fib_index;
1631
1632   vnet_buffer (b[1])->ip.fib_index =
1633     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1634     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1635     vnet_buffer (b[1])->ip.fib_index;
1636
1637   /*
1638    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1639    *  adjacency for the destination address (the local interface address).
1640    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1641    *  adjacency for the source address (the remote sender's address)
1642    */
1643   if (PREDICT_TRUE (not_last_hit))
1644     {
1645       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1646       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1647
1648       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1649       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1650
1651       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1652                                            &ip[0]->src_address, 2);
1653       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1654                                            &ip[1]->src_address, 2);
1655
1656       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1657                                            &ip[0]->src_address, 3);
1658       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1659                                            &ip[1]->src_address, 3);
1660
1661       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1662       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1663
1664       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1665         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1666       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1667
1668       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1669         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1670       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1671
1672       lb[0] = load_balance_get (lbi[0]);
1673       lb[1] = load_balance_get (lbi[1]);
1674
1675       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1676       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1677
1678       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1679                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1680                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1681       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1682                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1683                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1684                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1685
1686       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1687                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1688                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1689       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1690                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1691                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1692                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1693
1694       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1695       last_check->lbi = lbi[1];
1696       last_check->error = error[1];
1697       last_check->first = 0;
1698     }
1699   else
1700     {
1701       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1702         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1703       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1704
1705       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1706         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1707       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1708
1709       error[0] = last_check->error;
1710       error[1] = last_check->error;
1711     }
1712 }
1713
1714 enum ip_local_packet_type_e
1715 {
1716   IP_LOCAL_PACKET_TYPE_L4,
1717   IP_LOCAL_PACKET_TYPE_NAT,
1718   IP_LOCAL_PACKET_TYPE_FRAG,
1719 };
1720
1721 /**
1722  * Determine packet type and next node.
1723  *
1724  * The expectation is that all packets that are not L4 will skip
1725  * checksums and source checks.
1726  */
1727 always_inline u8
1728 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1729 {
1730   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1731
1732   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1733     {
1734       *next = IP_LOCAL_NEXT_REASSEMBLY;
1735       return IP_LOCAL_PACKET_TYPE_FRAG;
1736     }
1737   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1738     {
1739       *next = lm->local_next_by_ip_protocol[ip->protocol];
1740       return IP_LOCAL_PACKET_TYPE_NAT;
1741     }
1742
1743   *next = lm->local_next_by_ip_protocol[ip->protocol];
1744   return IP_LOCAL_PACKET_TYPE_L4;
1745 }
1746
1747 static inline uword
1748 ip4_local_inline (vlib_main_t * vm,
1749                   vlib_node_runtime_t * node,
1750                   vlib_frame_t * frame, int head_of_feature_arc)
1751 {
1752   u32 *from, n_left_from;
1753   vlib_node_runtime_t *error_node =
1754     vlib_node_get_runtime (vm, ip4_local_node.index);
1755   u16 nexts[VLIB_FRAME_SIZE], *next;
1756   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1757   ip4_header_t *ip[2];
1758   u8 error[2], pt[2];
1759
1760   ip4_local_last_check_t last_check = {
1761     /*
1762      * 0.0.0.0 can appear as the source address of an IP packet,
1763      * as can any other address, hence the need to use the 'first'
1764      * member to make sure the .lbi is initialised for the first
1765      * packet.
1766      */
1767     .src = {.as_u32 = 0},
1768     .lbi = ~0,
1769     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1770     .first = 1,
1771   };
1772
1773   from = vlib_frame_vector_args (frame);
1774   n_left_from = frame->n_vectors;
1775
1776   if (node->flags & VLIB_NODE_FLAG_TRACE)
1777     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1778
1779   vlib_get_buffers (vm, from, bufs, n_left_from);
1780   b = bufs;
1781   next = nexts;
1782
1783   while (n_left_from >= 6)
1784     {
1785       u8 not_batch = 0;
1786
1787       /* Prefetch next iteration. */
1788       {
1789         vlib_prefetch_buffer_header (b[4], LOAD);
1790         vlib_prefetch_buffer_header (b[5], LOAD);
1791
1792         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1793         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1794       }
1795
1796       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1797
1798       ip[0] = vlib_buffer_get_current (b[0]);
1799       ip[1] = vlib_buffer_get_current (b[1]);
1800
1801       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1802       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1803
1804       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1805       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1806
1807       not_batch = pt[0] ^ pt[1];
1808
1809       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1810         goto skip_checks;
1811
1812       if (PREDICT_TRUE (not_batch == 0))
1813         {
1814           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1815           ip4_local_check_src_x2 (b, ip, &last_check, error);
1816         }
1817       else
1818         {
1819           if (!pt[0])
1820             {
1821               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1822               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1823             }
1824           if (!pt[1])
1825             {
1826               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1827               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1828             }
1829         }
1830
1831     skip_checks:
1832
1833       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1834                                     head_of_feature_arc);
1835       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1836                                     head_of_feature_arc);
1837
1838       b += 2;
1839       next += 2;
1840       n_left_from -= 2;
1841     }
1842
1843   while (n_left_from > 0)
1844     {
1845       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1846
1847       ip[0] = vlib_buffer_get_current (b[0]);
1848       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1849       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1850
1851       if (head_of_feature_arc == 0 || pt[0])
1852         goto skip_check;
1853
1854       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1855       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1856
1857     skip_check:
1858
1859       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1860                                     head_of_feature_arc);
1861
1862       b += 1;
1863       next += 1;
1864       n_left_from -= 1;
1865     }
1866
1867   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1868   return frame->n_vectors;
1869 }
1870
1871 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1872                                vlib_frame_t * frame)
1873 {
1874   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1875 }
1876
1877 /* *INDENT-OFF* */
1878 VLIB_REGISTER_NODE (ip4_local_node) =
1879 {
1880   .name = "ip4-local",
1881   .vector_size = sizeof (u32),
1882   .format_trace = format_ip4_forward_next_trace,
1883   .n_errors = IP4_N_ERROR,
1884   .error_strings = ip4_error_strings,
1885   .n_next_nodes = IP_LOCAL_N_NEXT,
1886   .next_nodes =
1887   {
1888     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1889     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1890     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1891     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1892     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1893   },
1894 };
1895 /* *INDENT-ON* */
1896
1897
1898 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1899                                           vlib_node_runtime_t * node,
1900                                           vlib_frame_t * frame)
1901 {
1902   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1903 }
1904
1905 /* *INDENT-OFF* */
1906 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1907   .name = "ip4-local-end-of-arc",
1908   .vector_size = sizeof (u32),
1909
1910   .format_trace = format_ip4_forward_next_trace,
1911   .sibling_of = "ip4-local",
1912 };
1913
1914 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1915   .arc_name = "ip4-local",
1916   .node_name = "ip4-local-end-of-arc",
1917   .runs_before = 0, /* not before any other features */
1918 };
1919 /* *INDENT-ON* */
1920
1921 #ifndef CLIB_MARCH_VARIANT
1922 void
1923 ip4_register_protocol (u32 protocol, u32 node_index)
1924 {
1925   vlib_main_t *vm = vlib_get_main ();
1926   ip4_main_t *im = &ip4_main;
1927   ip_lookup_main_t *lm = &im->lookup_main;
1928
1929   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1930   lm->local_next_by_ip_protocol[protocol] =
1931     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1932 }
1933
1934 void
1935 ip4_unregister_protocol (u32 protocol)
1936 {
1937   ip4_main_t *im = &ip4_main;
1938   ip_lookup_main_t *lm = &im->lookup_main;
1939
1940   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1941   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1942 }
1943 #endif
1944
1945 static clib_error_t *
1946 show_ip_local_command_fn (vlib_main_t * vm,
1947                           unformat_input_t * input, vlib_cli_command_t * cmd)
1948 {
1949   ip4_main_t *im = &ip4_main;
1950   ip_lookup_main_t *lm = &im->lookup_main;
1951   int i;
1952
1953   vlib_cli_output (vm, "Protocols handled by ip4_local");
1954   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1955     {
1956       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1957         {
1958           u32 node_index = vlib_get_node (vm,
1959                                           ip4_local_node.index)->
1960             next_nodes[lm->local_next_by_ip_protocol[i]];
1961           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1962                            format_vlib_node_name, vm, node_index);
1963         }
1964     }
1965   return 0;
1966 }
1967
1968
1969
1970 /*?
1971  * Display the set of protocols handled by the local IPv4 stack.
1972  *
1973  * @cliexpar
1974  * Example of how to display local protocol table:
1975  * @cliexstart{show ip local}
1976  * Protocols handled by ip4_local
1977  * 1
1978  * 17
1979  * 47
1980  * @cliexend
1981 ?*/
1982 /* *INDENT-OFF* */
1983 VLIB_CLI_COMMAND (show_ip_local, static) =
1984 {
1985   .path = "show ip local",
1986   .function = show_ip_local_command_fn,
1987   .short_help = "show ip local",
1988 };
1989 /* *INDENT-ON* */
1990
1991 typedef enum
1992 {
1993   IP4_REWRITE_NEXT_DROP,
1994   IP4_REWRITE_NEXT_ICMP_ERROR,
1995   IP4_REWRITE_NEXT_FRAGMENT,
1996   IP4_REWRITE_N_NEXT            /* Last */
1997 } ip4_rewrite_next_t;
1998
1999 /**
2000  * This bits of an IPv4 address to mask to construct a multicast
2001  * MAC address
2002  */
2003 #if CLIB_ARCH_IS_BIG_ENDIAN
2004 #define IP4_MCAST_ADDR_MASK 0x007fffff
2005 #else
2006 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2007 #endif
2008
2009 always_inline void
2010 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2011                u16 adj_packet_bytes, bool df, u16 * next,
2012                u8 is_midchain, u32 * error)
2013 {
2014   if (packet_len > adj_packet_bytes)
2015     {
2016       *error = IP4_ERROR_MTU_EXCEEDED;
2017       if (df)
2018         {
2019           icmp4_error_set_vnet_buffer
2020             (b, ICMP4_destination_unreachable,
2021              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2022              adj_packet_bytes);
2023           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2024         }
2025       else
2026         {
2027           /* IP fragmentation */
2028           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2029                                    (is_midchain ?
2030                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2031                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2032           *next = IP4_REWRITE_NEXT_FRAGMENT;
2033         }
2034     }
2035 }
2036
2037 /* increment TTL & update checksum.
2038    Works either endian, so no need for byte swap. */
2039 static_always_inline void
2040 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2041 {
2042   i32 ttl;
2043   u32 checksum;
2044   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2045     return;
2046
2047   ttl = ip->ttl;
2048
2049   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2050   checksum += checksum >= 0xffff;
2051
2052   ip->checksum = checksum;
2053   ttl += 1;
2054   ip->ttl = ttl;
2055
2056   ASSERT (ip->checksum == ip4_header_checksum (ip));
2057 }
2058
2059 /* Decrement TTL & update checksum.
2060    Works either endian, so no need for byte swap. */
2061 static_always_inline void
2062 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2063                             u32 * error)
2064 {
2065   i32 ttl;
2066   u32 checksum;
2067   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2068     return;
2069
2070   ttl = ip->ttl;
2071
2072   /* Input node should have reject packets with ttl 0. */
2073   ASSERT (ip->ttl > 0);
2074
2075   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2076   checksum += checksum >= 0xffff;
2077
2078   ip->checksum = checksum;
2079   ttl -= 1;
2080   ip->ttl = ttl;
2081
2082   /*
2083    * If the ttl drops below 1 when forwarding, generate
2084    * an ICMP response.
2085    */
2086   if (PREDICT_FALSE (ttl <= 0))
2087     {
2088       *error = IP4_ERROR_TIME_EXPIRED;
2089       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2090       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2091                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2092                                    0);
2093       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2094     }
2095
2096   /* Verify checksum. */
2097   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2098           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2099 }
2100
2101
2102 always_inline uword
2103 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2104                              vlib_node_runtime_t * node,
2105                              vlib_frame_t * frame,
2106                              int do_counters, int is_midchain, int is_mcast)
2107 {
2108   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2109   u32 *from = vlib_frame_vector_args (frame);
2110   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2111   u16 nexts[VLIB_FRAME_SIZE], *next;
2112   u32 n_left_from;
2113   vlib_node_runtime_t *error_node =
2114     vlib_node_get_runtime (vm, ip4_input_node.index);
2115
2116   n_left_from = frame->n_vectors;
2117   u32 thread_index = vm->thread_index;
2118
2119   vlib_get_buffers (vm, from, bufs, n_left_from);
2120   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2121
2122 #if (CLIB_N_PREFETCHES >= 8)
2123   if (n_left_from >= 6)
2124     {
2125       int i;
2126       for (i = 2; i < 6; i++)
2127         vlib_prefetch_buffer_header (bufs[i], LOAD);
2128     }
2129
2130   next = nexts;
2131   b = bufs;
2132   while (n_left_from >= 8)
2133     {
2134       const ip_adjacency_t *adj0, *adj1;
2135       ip4_header_t *ip0, *ip1;
2136       u32 rw_len0, error0, adj_index0;
2137       u32 rw_len1, error1, adj_index1;
2138       u32 tx_sw_if_index0, tx_sw_if_index1;
2139       u8 *p;
2140
2141       vlib_prefetch_buffer_header (b[6], LOAD);
2142       vlib_prefetch_buffer_header (b[7], LOAD);
2143
2144       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2145       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2146
2147       /*
2148        * pre-fetch the per-adjacency counters
2149        */
2150       if (do_counters)
2151         {
2152           vlib_prefetch_combined_counter (&adjacency_counters,
2153                                           thread_index, adj_index0);
2154           vlib_prefetch_combined_counter (&adjacency_counters,
2155                                           thread_index, adj_index1);
2156         }
2157
2158       ip0 = vlib_buffer_get_current (b[0]);
2159       ip1 = vlib_buffer_get_current (b[1]);
2160
2161       error0 = error1 = IP4_ERROR_NONE;
2162
2163       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2164       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2165
2166       /* Rewrite packet header and updates lengths. */
2167       adj0 = adj_get (adj_index0);
2168       adj1 = adj_get (adj_index1);
2169
2170       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2171       rw_len0 = adj0[0].rewrite_header.data_bytes;
2172       rw_len1 = adj1[0].rewrite_header.data_bytes;
2173       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2174       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2175
2176       p = vlib_buffer_get_current (b[2]);
2177       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2178       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2179
2180       p = vlib_buffer_get_current (b[3]);
2181       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2182       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2183
2184       /* Check MTU of outgoing interface. */
2185       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2186       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2187
2188       if (b[0]->flags & VNET_BUFFER_F_GSO)
2189         ip0_len = gso_mtu_sz (b[0]);
2190       if (b[1]->flags & VNET_BUFFER_F_GSO)
2191         ip1_len = gso_mtu_sz (b[1]);
2192
2193       ip4_mtu_check (b[0], ip0_len,
2194                      adj0[0].rewrite_header.max_l3_packet_bytes,
2195                      ip0->flags_and_fragment_offset &
2196                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2197                      next + 0, is_midchain, &error0);
2198       ip4_mtu_check (b[1], ip1_len,
2199                      adj1[0].rewrite_header.max_l3_packet_bytes,
2200                      ip1->flags_and_fragment_offset &
2201                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2202                      next + 1, is_midchain, &error1);
2203
2204       if (is_mcast)
2205         {
2206           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2207                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2208                     IP4_ERROR_SAME_INTERFACE : error0);
2209           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2210                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2211                     IP4_ERROR_SAME_INTERFACE : error1);
2212         }
2213
2214       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2215        * to see the IP header */
2216       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2217         {
2218           u32 next_index = adj0[0].rewrite_header.next_index;
2219           vlib_buffer_advance (b[0], -(word) rw_len0);
2220
2221           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2222           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2223
2224           if (PREDICT_FALSE
2225               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2226             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2227                                                 tx_sw_if_index0,
2228                                                 &next_index, b[0],
2229                                                 adj0->ia_cfg_index);
2230
2231           next[0] = next_index;
2232           if (is_midchain)
2233             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2234                                         0 /* is_ip6 */ ,
2235                                         0 /* with gso */ );
2236         }
2237       else
2238         {
2239           b[0]->error = error_node->errors[error0];
2240           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2241             ip4_ttl_inc (b[0], ip0);
2242         }
2243       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2244         {
2245           u32 next_index = adj1[0].rewrite_header.next_index;
2246           vlib_buffer_advance (b[1], -(word) rw_len1);
2247
2248           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2249           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2250
2251           if (PREDICT_FALSE
2252               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2253             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2254                                                 tx_sw_if_index1,
2255                                                 &next_index, b[1],
2256                                                 adj1->ia_cfg_index);
2257           next[1] = next_index;
2258           if (is_midchain)
2259             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2260                                         0 /* is_ip6 */ ,
2261                                         0 /* with gso */ );
2262         }
2263       else
2264         {
2265           b[1]->error = error_node->errors[error1];
2266           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2267             ip4_ttl_inc (b[1], ip1);
2268         }
2269
2270       if (is_midchain)
2271         /* Guess we are only writing on ipv4 header. */
2272         vnet_rewrite_two_headers (adj0[0], adj1[0],
2273                                   ip0, ip1, sizeof (ip4_header_t));
2274       else
2275         /* Guess we are only writing on simple Ethernet header. */
2276         vnet_rewrite_two_headers (adj0[0], adj1[0],
2277                                   ip0, ip1, sizeof (ethernet_header_t));
2278
2279       if (do_counters)
2280         {
2281           if (error0 == IP4_ERROR_NONE)
2282             vlib_increment_combined_counter
2283               (&adjacency_counters,
2284                thread_index,
2285                adj_index0, 1,
2286                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2287
2288           if (error1 == IP4_ERROR_NONE)
2289             vlib_increment_combined_counter
2290               (&adjacency_counters,
2291                thread_index,
2292                adj_index1, 1,
2293                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2294         }
2295
2296       if (is_midchain)
2297         {
2298           if (error0 == IP4_ERROR_NONE)
2299             adj_midchain_fixup (vm, adj0, b[0]);
2300           if (error1 == IP4_ERROR_NONE)
2301             adj_midchain_fixup (vm, adj1, b[1]);
2302         }
2303
2304       if (is_mcast)
2305         {
2306           /* copy bytes from the IP address into the MAC rewrite */
2307           if (error0 == IP4_ERROR_NONE)
2308             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2309                                         adj0->rewrite_header.dst_mcast_offset,
2310                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2311           if (error1 == IP4_ERROR_NONE)
2312             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2313                                         adj1->rewrite_header.dst_mcast_offset,
2314                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2315         }
2316
2317       next += 2;
2318       b += 2;
2319       n_left_from -= 2;
2320     }
2321 #elif (CLIB_N_PREFETCHES >= 4)
2322   next = nexts;
2323   b = bufs;
2324   while (n_left_from >= 1)
2325     {
2326       ip_adjacency_t *adj0;
2327       ip4_header_t *ip0;
2328       u32 rw_len0, error0, adj_index0;
2329       u32 tx_sw_if_index0;
2330       u8 *p;
2331
2332       /* Prefetch next iteration */
2333       if (PREDICT_TRUE (n_left_from >= 4))
2334         {
2335           ip_adjacency_t *adj2;
2336           u32 adj_index2;
2337
2338           vlib_prefetch_buffer_header (b[3], LOAD);
2339           vlib_prefetch_buffer_data (b[2], LOAD);
2340
2341           /* Prefetch adj->rewrite_header */
2342           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2343           adj2 = adj_get (adj_index2);
2344           p = (u8 *) adj2;
2345           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2346                          LOAD);
2347         }
2348
2349       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2350
2351       /*
2352        * Prefetch the per-adjacency counters
2353        */
2354       if (do_counters)
2355         {
2356           vlib_prefetch_combined_counter (&adjacency_counters,
2357                                           thread_index, adj_index0);
2358         }
2359
2360       ip0 = vlib_buffer_get_current (b[0]);
2361
2362       error0 = IP4_ERROR_NONE;
2363
2364       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2365
2366       /* Rewrite packet header and updates lengths. */
2367       adj0 = adj_get (adj_index0);
2368
2369       /* Rewrite header was prefetched. */
2370       rw_len0 = adj0[0].rewrite_header.data_bytes;
2371       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2372
2373       /* Check MTU of outgoing interface. */
2374       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2375
2376       if (b[0]->flags & VNET_BUFFER_F_GSO)
2377         ip0_len = gso_mtu_sz (b[0]);
2378
2379       ip4_mtu_check (b[0], ip0_len,
2380                      adj0[0].rewrite_header.max_l3_packet_bytes,
2381                      ip0->flags_and_fragment_offset &
2382                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2383                      next + 0, is_midchain, &error0);
2384
2385       if (is_mcast)
2386         {
2387           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2388                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2389                     IP4_ERROR_SAME_INTERFACE : error0);
2390         }
2391
2392       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2393        * to see the IP header */
2394       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2395         {
2396           u32 next_index = adj0[0].rewrite_header.next_index;
2397           vlib_buffer_advance (b[0], -(word) rw_len0);
2398           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2399           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2400
2401           if (PREDICT_FALSE
2402               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2403             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2404                                                 tx_sw_if_index0,
2405                                                 &next_index, b[0],
2406                                                 adj0->ia_cfg_index);
2407           next[0] = next_index;
2408
2409           if (is_midchain)
2410             {
2411               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2412                                           0 /* is_ip6 */ ,
2413                                           0 /* with gso */ );
2414
2415               /* Guess we are only writing on ipv4 header. */
2416               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2417             }
2418           else
2419             /* Guess we are only writing on simple Ethernet header. */
2420             vnet_rewrite_one_header (adj0[0], ip0,
2421                                      sizeof (ethernet_header_t));
2422
2423           /*
2424            * Bump the per-adjacency counters
2425            */
2426           if (do_counters)
2427             vlib_increment_combined_counter
2428               (&adjacency_counters,
2429                thread_index,
2430                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2431                                                            b[0]) + rw_len0);
2432
2433           if (is_midchain)
2434             adj_midchain_fixup (vm, adj0, b[0]);
2435
2436           if (is_mcast)
2437             /* copy bytes from the IP address into the MAC rewrite */
2438             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2439                                         adj0->rewrite_header.dst_mcast_offset,
2440                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2441         }
2442       else
2443         {
2444           b[0]->error = error_node->errors[error0];
2445           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2446             ip4_ttl_inc (b[0], ip0);
2447         }
2448
2449       next += 1;
2450       b += 1;
2451       n_left_from -= 1;
2452     }
2453 #endif
2454
2455   while (n_left_from > 0)
2456     {
2457       ip_adjacency_t *adj0;
2458       ip4_header_t *ip0;
2459       u32 rw_len0, adj_index0, error0;
2460       u32 tx_sw_if_index0;
2461
2462       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2463
2464       adj0 = adj_get (adj_index0);
2465
2466       if (do_counters)
2467         vlib_prefetch_combined_counter (&adjacency_counters,
2468                                         thread_index, adj_index0);
2469
2470       ip0 = vlib_buffer_get_current (b[0]);
2471
2472       error0 = IP4_ERROR_NONE;
2473
2474       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2475
2476
2477       /* Update packet buffer attributes/set output interface. */
2478       rw_len0 = adj0[0].rewrite_header.data_bytes;
2479       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2480
2481       /* Check MTU of outgoing interface. */
2482       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2483       if (b[0]->flags & VNET_BUFFER_F_GSO)
2484         ip0_len = gso_mtu_sz (b[0]);
2485
2486       ip4_mtu_check (b[0], ip0_len,
2487                      adj0[0].rewrite_header.max_l3_packet_bytes,
2488                      ip0->flags_and_fragment_offset &
2489                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2490                      next + 0, is_midchain, &error0);
2491
2492       if (is_mcast)
2493         {
2494           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2495                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2496                     IP4_ERROR_SAME_INTERFACE : error0);
2497         }
2498
2499       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2500        * to see the IP header */
2501       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2502         {
2503           u32 next_index = adj0[0].rewrite_header.next_index;
2504           vlib_buffer_advance (b[0], -(word) rw_len0);
2505           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2506           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2507
2508           if (PREDICT_FALSE
2509               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2510             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2511                                                 tx_sw_if_index0,
2512                                                 &next_index, b[0],
2513                                                 adj0->ia_cfg_index);
2514           next[0] = next_index;
2515
2516           if (is_midchain)
2517             {
2518               /* this acts on the packet that is about to be encapped */
2519               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2520                                           0 /* is_ip6 */ ,
2521                                           0 /* with gso */ );
2522
2523               /* Guess we are only writing on ipv4 header. */
2524               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2525             }
2526           else
2527             /* Guess we are only writing on simple Ethernet header. */
2528             vnet_rewrite_one_header (adj0[0], ip0,
2529                                      sizeof (ethernet_header_t));
2530
2531           if (do_counters)
2532             vlib_increment_combined_counter
2533               (&adjacency_counters,
2534                thread_index, adj_index0, 1,
2535                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2536
2537           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2538             adj0->sub_type.midchain.fixup_func
2539               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2540
2541           if (is_mcast)
2542             /* copy bytes from the IP address into the MAC rewrite */
2543             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2544                                         adj0->rewrite_header.dst_mcast_offset,
2545                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2546         }
2547       else
2548         {
2549           b[0]->error = error_node->errors[error0];
2550           /* undo the TTL decrement - we'll be back to do it again */
2551           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2552             ip4_ttl_inc (b[0], ip0);
2553         }
2554
2555       next += 1;
2556       b += 1;
2557       n_left_from -= 1;
2558     }
2559
2560
2561   /* Need to do trace after rewrites to pick up new packet data. */
2562   if (node->flags & VLIB_NODE_FLAG_TRACE)
2563     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2564
2565   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2566   return frame->n_vectors;
2567 }
2568
2569 always_inline uword
2570 ip4_rewrite_inline (vlib_main_t * vm,
2571                     vlib_node_runtime_t * node,
2572                     vlib_frame_t * frame,
2573                     int do_counters, int is_midchain, int is_mcast)
2574 {
2575   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2576                                       is_midchain, is_mcast);
2577 }
2578
2579
2580 /** @brief IPv4 rewrite node.
2581     @node ip4-rewrite
2582
2583     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2584     header checksum, fetch the ip adjacency, check the outbound mtu,
2585     apply the adjacency rewrite, and send pkts to the adjacency
2586     rewrite header's rewrite_next_index.
2587
2588     @param vm vlib_main_t corresponding to the current thread
2589     @param node vlib_node_runtime_t
2590     @param frame vlib_frame_t whose contents should be dispatched
2591
2592     @par Graph mechanics: buffer metadata, next index usage
2593
2594     @em Uses:
2595     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2596         - the rewrite adjacency index
2597     - <code>adj->lookup_next_index</code>
2598         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2599           the packet will be dropped.
2600     - <code>adj->rewrite_header</code>
2601         - Rewrite string length, rewrite string, next_index
2602
2603     @em Sets:
2604     - <code>b->current_data, b->current_length</code>
2605         - Updated net of applying the rewrite string
2606
2607     <em>Next Indices:</em>
2608     - <code> adj->rewrite_header.next_index </code>
2609       or @c ip4-drop
2610 */
2611
2612 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2613                                  vlib_frame_t * frame)
2614 {
2615   if (adj_are_counters_enabled ())
2616     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2617   else
2618     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2619 }
2620
2621 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2622                                        vlib_node_runtime_t * node,
2623                                        vlib_frame_t * frame)
2624 {
2625   if (adj_are_counters_enabled ())
2626     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2627   else
2628     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2629 }
2630
2631 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2632                                   vlib_node_runtime_t * node,
2633                                   vlib_frame_t * frame)
2634 {
2635   if (adj_are_counters_enabled ())
2636     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2637   else
2638     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2639 }
2640
2641 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2642                                        vlib_node_runtime_t * node,
2643                                        vlib_frame_t * frame)
2644 {
2645   if (adj_are_counters_enabled ())
2646     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2647   else
2648     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2649 }
2650
2651 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2652                                         vlib_node_runtime_t * node,
2653                                         vlib_frame_t * frame)
2654 {
2655   if (adj_are_counters_enabled ())
2656     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2657   else
2658     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2659 }
2660
2661 /* *INDENT-OFF* */
2662 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2663   .name = "ip4-rewrite",
2664   .vector_size = sizeof (u32),
2665
2666   .format_trace = format_ip4_rewrite_trace,
2667
2668   .n_next_nodes = IP4_REWRITE_N_NEXT,
2669   .next_nodes = {
2670     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2671     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2672     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2673   },
2674 };
2675
2676 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2677   .name = "ip4-rewrite-bcast",
2678   .vector_size = sizeof (u32),
2679
2680   .format_trace = format_ip4_rewrite_trace,
2681   .sibling_of = "ip4-rewrite",
2682 };
2683
2684 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2685   .name = "ip4-rewrite-mcast",
2686   .vector_size = sizeof (u32),
2687
2688   .format_trace = format_ip4_rewrite_trace,
2689   .sibling_of = "ip4-rewrite",
2690 };
2691
2692 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2693   .name = "ip4-mcast-midchain",
2694   .vector_size = sizeof (u32),
2695
2696   .format_trace = format_ip4_rewrite_trace,
2697   .sibling_of = "ip4-rewrite",
2698 };
2699
2700 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2701   .name = "ip4-midchain",
2702   .vector_size = sizeof (u32),
2703   .format_trace = format_ip4_rewrite_trace,
2704   .sibling_of = "ip4-rewrite",
2705 };
2706 /* *INDENT-ON */
2707
2708 static int
2709 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2710 {
2711   ip4_fib_mtrie_t *mtrie0;
2712   ip4_fib_mtrie_leaf_t leaf0;
2713   u32 lbi0;
2714
2715   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2716
2717   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2718   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2719   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2720
2721   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2722
2723   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2724 }
2725
2726 static clib_error_t *
2727 test_lookup_command_fn (vlib_main_t * vm,
2728                         unformat_input_t * input, vlib_cli_command_t * cmd)
2729 {
2730   ip4_fib_t *fib;
2731   u32 table_id = 0;
2732   f64 count = 1;
2733   u32 n;
2734   int i;
2735   ip4_address_t ip4_base_address;
2736   u64 errors = 0;
2737
2738   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2739     {
2740       if (unformat (input, "table %d", &table_id))
2741         {
2742           /* Make sure the entry exists. */
2743           fib = ip4_fib_get (table_id);
2744           if ((fib) && (fib->index != table_id))
2745             return clib_error_return (0, "<fib-index> %d does not exist",
2746                                       table_id);
2747         }
2748       else if (unformat (input, "count %f", &count))
2749         ;
2750
2751       else if (unformat (input, "%U",
2752                          unformat_ip4_address, &ip4_base_address))
2753         ;
2754       else
2755         return clib_error_return (0, "unknown input `%U'",
2756                                   format_unformat_error, input);
2757     }
2758
2759   n = count;
2760
2761   for (i = 0; i < n; i++)
2762     {
2763       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2764         errors++;
2765
2766       ip4_base_address.as_u32 =
2767         clib_host_to_net_u32 (1 +
2768                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2769     }
2770
2771   if (errors)
2772     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2773   else
2774     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2775
2776   return 0;
2777 }
2778
2779 /*?
2780  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2781  * given FIB table to determine if there is a conflict with the
2782  * adjacency table. The fib-id can be determined by using the
2783  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2784  * of 0 is used.
2785  *
2786  * @todo This command uses fib-id, other commands use table-id (not
2787  * just a name, they are different indexes). Would like to change this
2788  * to table-id for consistency.
2789  *
2790  * @cliexpar
2791  * Example of how to run the test lookup command:
2792  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2793  * No errors in 2 lookups
2794  * @cliexend
2795 ?*/
2796 /* *INDENT-OFF* */
2797 VLIB_CLI_COMMAND (lookup_test_command, static) =
2798 {
2799   .path = "test lookup",
2800   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2801   .function = test_lookup_command_fn,
2802 };
2803 /* *INDENT-ON* */
2804
2805 #ifndef CLIB_MARCH_VARIANT
2806 int
2807 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2808 {
2809   u32 fib_index;
2810
2811   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2812
2813   if (~0 == fib_index)
2814     return VNET_API_ERROR_NO_SUCH_FIB;
2815
2816   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2817                                   flow_hash_config);
2818
2819   return 0;
2820 }
2821 #endif
2822
2823 static clib_error_t *
2824 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2825                              unformat_input_t * input,
2826                              vlib_cli_command_t * cmd)
2827 {
2828   int matched = 0;
2829   u32 table_id = 0;
2830   u32 flow_hash_config = 0;
2831   int rv;
2832
2833   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2834     {
2835       if (unformat (input, "table %d", &table_id))
2836         matched = 1;
2837 #define _(a,v) \
2838     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2839       foreach_flow_hash_bit
2840 #undef _
2841         else
2842         break;
2843     }
2844
2845   if (matched == 0)
2846     return clib_error_return (0, "unknown input `%U'",
2847                               format_unformat_error, input);
2848
2849   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2850   switch (rv)
2851     {
2852     case 0:
2853       break;
2854
2855     case VNET_API_ERROR_NO_SUCH_FIB:
2856       return clib_error_return (0, "no such FIB table %d", table_id);
2857
2858     default:
2859       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2860       break;
2861     }
2862
2863   return 0;
2864 }
2865
2866 /*?
2867  * Configure the set of IPv4 fields used by the flow hash.
2868  *
2869  * @cliexpar
2870  * Example of how to set the flow hash on a given table:
2871  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2872  * Example of display the configured flow hash:
2873  * @cliexstart{show ip fib}
2874  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2875  * 0.0.0.0/0
2876  *   unicast-ip4-chain
2877  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2878  *     [0] [@0]: dpo-drop ip6
2879  * 0.0.0.0/32
2880  *   unicast-ip4-chain
2881  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2882  *     [0] [@0]: dpo-drop ip6
2883  * 224.0.0.0/8
2884  *   unicast-ip4-chain
2885  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2886  *     [0] [@0]: dpo-drop ip6
2887  * 6.0.1.2/32
2888  *   unicast-ip4-chain
2889  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2890  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2891  * 7.0.0.1/32
2892  *   unicast-ip4-chain
2893  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2894  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2895  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2896  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2897  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2898  * 240.0.0.0/8
2899  *   unicast-ip4-chain
2900  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2901  *     [0] [@0]: dpo-drop ip6
2902  * 255.255.255.255/32
2903  *   unicast-ip4-chain
2904  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2905  *     [0] [@0]: dpo-drop ip6
2906  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2907  * 0.0.0.0/0
2908  *   unicast-ip4-chain
2909  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2910  *     [0] [@0]: dpo-drop ip6
2911  * 0.0.0.0/32
2912  *   unicast-ip4-chain
2913  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2914  *     [0] [@0]: dpo-drop ip6
2915  * 172.16.1.0/24
2916  *   unicast-ip4-chain
2917  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2918  *     [0] [@4]: ipv4-glean: af_packet0
2919  * 172.16.1.1/32
2920  *   unicast-ip4-chain
2921  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2922  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2923  * 172.16.1.2/32
2924  *   unicast-ip4-chain
2925  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2926  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2927  * 172.16.2.0/24
2928  *   unicast-ip4-chain
2929  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2930  *     [0] [@4]: ipv4-glean: af_packet1
2931  * 172.16.2.1/32
2932  *   unicast-ip4-chain
2933  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2934  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2935  * 224.0.0.0/8
2936  *   unicast-ip4-chain
2937  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2938  *     [0] [@0]: dpo-drop ip6
2939  * 240.0.0.0/8
2940  *   unicast-ip4-chain
2941  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2942  *     [0] [@0]: dpo-drop ip6
2943  * 255.255.255.255/32
2944  *   unicast-ip4-chain
2945  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2946  *     [0] [@0]: dpo-drop ip6
2947  * @cliexend
2948 ?*/
2949 /* *INDENT-OFF* */
2950 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2951 {
2952   .path = "set ip flow-hash",
2953   .short_help =
2954   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2955   .function = set_ip_flow_hash_command_fn,
2956 };
2957 /* *INDENT-ON* */
2958
2959 #ifndef CLIB_MARCH_VARIANT
2960 int
2961 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2962                              u32 table_index)
2963 {
2964   vnet_main_t *vnm = vnet_get_main ();
2965   vnet_interface_main_t *im = &vnm->interface_main;
2966   ip4_main_t *ipm = &ip4_main;
2967   ip_lookup_main_t *lm = &ipm->lookup_main;
2968   vnet_classify_main_t *cm = &vnet_classify_main;
2969   ip4_address_t *if_addr;
2970
2971   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2972     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2973
2974   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2975     return VNET_API_ERROR_NO_SUCH_ENTRY;
2976
2977   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2978   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2979
2980   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2981
2982   if (NULL != if_addr)
2983     {
2984       fib_prefix_t pfx = {
2985         .fp_len = 32,
2986         .fp_proto = FIB_PROTOCOL_IP4,
2987         .fp_addr.ip4 = *if_addr,
2988       };
2989       u32 fib_index;
2990
2991       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2992                                                        sw_if_index);
2993
2994
2995       if (table_index != (u32) ~ 0)
2996         {
2997           dpo_id_t dpo = DPO_INVALID;
2998
2999           dpo_set (&dpo,
3000                    DPO_CLASSIFY,
3001                    DPO_PROTO_IP4,
3002                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3003
3004           fib_table_entry_special_dpo_add (fib_index,
3005                                            &pfx,
3006                                            FIB_SOURCE_CLASSIFY,
3007                                            FIB_ENTRY_FLAG_NONE, &dpo);
3008           dpo_reset (&dpo);
3009         }
3010       else
3011         {
3012           fib_table_entry_special_remove (fib_index,
3013                                           &pfx, FIB_SOURCE_CLASSIFY);
3014         }
3015     }
3016
3017   return 0;
3018 }
3019 #endif
3020
3021 static clib_error_t *
3022 set_ip_classify_command_fn (vlib_main_t * vm,
3023                             unformat_input_t * input,
3024                             vlib_cli_command_t * cmd)
3025 {
3026   u32 table_index = ~0;
3027   int table_index_set = 0;
3028   u32 sw_if_index = ~0;
3029   int rv;
3030
3031   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3032     {
3033       if (unformat (input, "table-index %d", &table_index))
3034         table_index_set = 1;
3035       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3036                          vnet_get_main (), &sw_if_index))
3037         ;
3038       else
3039         break;
3040     }
3041
3042   if (table_index_set == 0)
3043     return clib_error_return (0, "classify table-index must be specified");
3044
3045   if (sw_if_index == ~0)
3046     return clib_error_return (0, "interface / subif must be specified");
3047
3048   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3049
3050   switch (rv)
3051     {
3052     case 0:
3053       break;
3054
3055     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3056       return clib_error_return (0, "No such interface");
3057
3058     case VNET_API_ERROR_NO_SUCH_ENTRY:
3059       return clib_error_return (0, "No such classifier table");
3060     }
3061   return 0;
3062 }
3063
3064 /*?
3065  * Assign a classification table to an interface. The classification
3066  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3067  * commands. Once the table is create, use this command to filter packets
3068  * on an interface.
3069  *
3070  * @cliexpar
3071  * Example of how to assign a classification table to an interface:
3072  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3073 ?*/
3074 /* *INDENT-OFF* */
3075 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3076 {
3077     .path = "set ip classify",
3078     .short_help =
3079     "set ip classify intfc <interface> table-index <classify-idx>",
3080     .function = set_ip_classify_command_fn,
3081 };
3082 /* *INDENT-ON* */
3083
3084 static clib_error_t *
3085 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3086 {
3087   ip4_main_t *im = &ip4_main;
3088   uword heapsize = 0;
3089
3090   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3091     {
3092       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3093         ;
3094       else
3095         return clib_error_return (0,
3096                                   "invalid heap-size parameter `%U'",
3097                                   format_unformat_error, input);
3098     }
3099
3100   im->mtrie_heap_size = heapsize;
3101
3102   return 0;
3103 }
3104
3105 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3106
3107 /*
3108  * fd.io coding-style-patch-verification: ON
3109  *
3110  * Local Variables:
3111  * eval: (c-set-style "gnu")
3112  * End:
3113  */