ip: fix interface ip address del sw_if_index check
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
384   if (a->address_length <= 30)
385     {
386       pfx_special.fp_len = a->address_length;
387       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
388
389       /* set the glean route for the prefix */
390       fib_table_entry_update_one_path (fib_index, &pfx_special,
391                                        FIB_SOURCE_INTERFACE,
392                                        (FIB_ENTRY_FLAG_CONNECTED |
393                                         FIB_ENTRY_FLAG_ATTACHED),
394                                        DPO_PROTO_IP4,
395                                        /* No next-hop address */
396                                        NULL,
397                                        sw_if_index,
398                                        /* invalid FIB index */
399                                        ~0,
400                                        1,
401                                        /* no out-label stack */
402                                        NULL,
403                                        FIB_ROUTE_PATH_FLAG_NONE);
404
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if:
532    * - deleting last intf addr in prefix
533    * - deleting intf addr used as default source address in glean adjacency
534    *
535    * We're done now otherwise
536    */
537   if ((if_prefix->ref_count > 0) &&
538       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
539     return;
540
541   /* length <= 30, delete glean route, first address, last address */
542   if (address_length <= 30)
543     {
544
545       /* remove glean route for prefix */
546       pfx_special.fp_addr.ip4 = *address;
547       pfx_special.fp_len = address_length;
548       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
549
550       /* if no more intf addresses in prefix, remove other special routes */
551       if (!if_prefix->ref_count)
552         {
553           /* first address in prefix */
554           pfx_special.fp_addr.ip4.as_u32 =
555             address->as_u32 & im->fib_masks[address_length];
556           pfx_special.fp_len = 32;
557
558           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559           fib_table_entry_special_remove (fib_index,
560                                           &pfx_special,
561                                           FIB_SOURCE_INTERFACE);
562
563           /* prefix broadcast address */
564           pfx_special.fp_addr.ip4.as_u32 =
565             address->as_u32 | ~im->fib_masks[address_length];
566           pfx_special.fp_len = 32;
567
568           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
569           fib_table_entry_special_remove (fib_index,
570                                           &pfx_special,
571                                           FIB_SOURCE_INTERFACE);
572         }
573       else
574         /* default source addr just got deleted, find another */
575         {
576           ip_interface_address_t *new_src_ia = NULL;
577           ip4_address_t *new_src_addr = NULL;
578
579           new_src_addr =
580             ip4_interface_address_matching_destination
581               (im, address, sw_if_index, &new_src_ia);
582
583           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
584
585           pfx_special.fp_len = address_length;
586           pfx_special.fp_addr.ip4 = *new_src_addr;
587
588           /* set new glean route for the prefix */
589           fib_table_entry_update_one_path (fib_index, &pfx_special,
590                                            FIB_SOURCE_INTERFACE,
591                                            (FIB_ENTRY_FLAG_CONNECTED |
592                                             FIB_ENTRY_FLAG_ATTACHED),
593                                            DPO_PROTO_IP4,
594                                            /* No next-hop address */
595                                            NULL,
596                                            sw_if_index,
597                                            /* invalid FIB index */
598                                            ~0,
599                                            1,
600                                            /* no out-label stack */
601                                            NULL,
602                                            FIB_ROUTE_PATH_FLAG_NONE);
603           return;
604         }
605     }
606   /* length == 31, delete attached route for the other address */
607   else if (address_length == 31)
608     {
609       pfx_special.fp_addr.ip4.as_u32 =
610         address->as_u32 ^ clib_host_to_net_u32(1);
611
612       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
613     }
614
615   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
616   pool_put (lm->if_prefix_pool, if_prefix);
617 }
618
619 static void
620 ip4_del_interface_routes (u32 sw_if_index,
621                           ip4_main_t * im,
622                           u32 fib_index,
623                           ip4_address_t * address, u32 address_length)
624 {
625   fib_prefix_t pfx = {
626     .fp_len = address_length,
627     .fp_proto = FIB_PROTOCOL_IP4,
628     .fp_addr.ip4 = *address,
629   };
630
631   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
632                                    address, address_length);
633
634   pfx.fp_len = 32;
635   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
636 }
637
638 #ifndef CLIB_MARCH_VARIANT
639 void
640 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
641 {
642   ip4_main_t *im = &ip4_main;
643
644   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
645
646   /*
647    * enable/disable only on the 1<->0 transition
648    */
649   if (is_enable)
650     {
651       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
652         return;
653     }
654   else
655     {
656       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
657       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
658         return;
659     }
660   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661                                !is_enable, 0, 0);
662
663
664   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
665                                sw_if_index, !is_enable, 0, 0);
666
667   {
668     ip4_enable_disable_interface_callback_t *cb;
669     vec_foreach (cb, im->enable_disable_interface_callbacks)
670       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671   }
672 }
673
674 static clib_error_t *
675 ip4_add_del_interface_address_internal (vlib_main_t * vm,
676                                         u32 sw_if_index,
677                                         ip4_address_t * address,
678                                         u32 address_length, u32 is_del)
679 {
680   vnet_main_t *vnm = vnet_get_main ();
681   ip4_main_t *im = &ip4_main;
682   ip_lookup_main_t *lm = &im->lookup_main;
683   clib_error_t *error = 0;
684   u32 if_address_index;
685   ip4_address_fib_t ip4_af, *addr_fib = 0;
686
687   /* local0 interface doesn't support IP addressing  */
688   if (sw_if_index == 0)
689     {
690       return
691        clib_error_create ("local0 interface doesn't support IP addressing");
692     }
693
694   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
695   ip4_addr_fib_init (&ip4_af, address,
696                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
697   vec_add1 (addr_fib, ip4_af);
698
699   /*
700    * there is no support for adj-fib handling in the presence of overlapping
701    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
702    * most routers do.
703    */
704   /* *INDENT-OFF* */
705   if (!is_del)
706     {
707       /* When adding an address check that it does not conflict
708          with an existing address on any interface in this table. */
709       ip_interface_address_t *ia;
710       vnet_sw_interface_t *sif;
711
712       pool_foreach(sif, vnm->interface_main.sw_interfaces,
713       ({
714           if (im->fib_index_by_sw_if_index[sw_if_index] ==
715               im->fib_index_by_sw_if_index[sif->sw_if_index])
716             {
717               foreach_ip_interface_address
718                 (&im->lookup_main, ia, sif->sw_if_index,
719                  0 /* honor unnumbered */ ,
720                  ({
721                    ip4_address_t * x =
722                      ip_interface_address_get_address
723                      (&im->lookup_main, ia);
724
725                    if (ip4_destination_matches_route
726                        (im, address, x, ia->address_length) ||
727                        ip4_destination_matches_route (im,
728                                                       x,
729                                                       address,
730                                                       address_length))
731                      {
732                        /* an intf may have >1 addr from the same prefix */
733                        if ((sw_if_index == sif->sw_if_index) &&
734                            (ia->address_length == address_length) &&
735                            (x->as_u32 != address->as_u32))
736                          continue;
737
738                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
739                          /* if the address we're comparing against is stale
740                           * then the CP has not added this one back yet, maybe
741                           * it never will, so we have to assume it won't and
742                           * ignore it. if it does add it back, then it will fail
743                           * because this one is now present */
744                          continue;
745
746                        /* error if the length or intf was different */
747                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
748
749                        error = clib_error_create
750                          ("failed to add %U on %U which conflicts with %U for interface %U",
751                           format_ip4_address_and_length, address,
752                           address_length,
753                           format_vnet_sw_if_index_name, vnm,
754                           sw_if_index,
755                           format_ip4_address_and_length, x,
756                           ia->address_length,
757                           format_vnet_sw_if_index_name, vnm,
758                           sif->sw_if_index);
759                        goto done;
760                      }
761                  }));
762             }
763       }));
764     }
765   /* *INDENT-ON* */
766
767   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
768
769   if (is_del)
770     {
771       if (~0 == if_address_index)
772         {
773           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
774           error = clib_error_create ("%U not found for interface %U",
775                                      lm->format_address_and_length,
776                                      addr_fib, address_length,
777                                      format_vnet_sw_if_index_name, vnm,
778                                      sw_if_index);
779           goto done;
780         }
781
782       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
783                                         address_length, sw_if_index);
784       if (error)
785         goto done;
786     }
787   else
788     {
789       if (~0 != if_address_index)
790         {
791           ip_interface_address_t *ia;
792
793           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
794
795           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
796             {
797               if (ia->sw_if_index == sw_if_index)
798                 {
799                   /* re-adding an address during the replace action.
800                    * consdier this the update. clear the flag and
801                    * we're done */
802                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
803                   goto done;
804                 }
805               else
806                 {
807                   /* The prefix is moving from one interface to another.
808                    * delete the stale and add the new */
809                   ip4_add_del_interface_address_internal (vm,
810                                                           ia->sw_if_index,
811                                                           address,
812                                                           address_length, 1);
813                   ia = NULL;
814                   error = ip_interface_address_add (lm, sw_if_index,
815                                                     addr_fib, address_length,
816                                                     &if_address_index);
817                 }
818             }
819           else
820             {
821               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
822               error = clib_error_create
823                 ("Prefix %U already found on interface %U",
824                  lm->format_address_and_length, addr_fib, address_length,
825                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
826             }
827         }
828       else
829         error = ip_interface_address_add (lm, sw_if_index,
830                                           addr_fib, address_length,
831                                           &if_address_index);
832     }
833
834   if (error)
835     goto done;
836
837   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
838   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
839
840   /* intf addr routes are added/deleted on admin up/down */
841   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
842     {
843       if (is_del)
844         ip4_del_interface_routes (sw_if_index,
845                                   im, ip4_af.fib_index, address,
846                                   address_length);
847       else
848         ip4_add_interface_routes (sw_if_index,
849                                   im, ip4_af.fib_index,
850                                   pool_elt_at_index
851                                   (lm->if_address_pool, if_address_index));
852     }
853
854   ip4_add_del_interface_address_callback_t *cb;
855   vec_foreach (cb, im->add_del_interface_address_callbacks)
856     cb->function (im, cb->function_opaque, sw_if_index,
857                   address, address_length, if_address_index, is_del);
858
859 done:
860   vec_free (addr_fib);
861   return error;
862 }
863
864 clib_error_t *
865 ip4_add_del_interface_address (vlib_main_t * vm,
866                                u32 sw_if_index,
867                                ip4_address_t * address,
868                                u32 address_length, u32 is_del)
869 {
870   return ip4_add_del_interface_address_internal
871     (vm, sw_if_index, address, address_length, is_del);
872 }
873
874 void
875 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
876 {
877   ip_interface_address_t *ia;
878   ip4_main_t *im;
879
880   im = &ip4_main;
881
882   /*
883    * when directed broadcast is enabled, the subnet braodcast route will forward
884    * packets using an adjacency with a broadcast MAC. otherwise it drops
885    */
886   /* *INDENT-OFF* */
887   foreach_ip_interface_address(&im->lookup_main, ia,
888                                sw_if_index, 0,
889      ({
890        if (ia->address_length <= 30)
891          {
892            ip4_address_t *ipa;
893
894            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
895
896            fib_prefix_t pfx = {
897              .fp_len = 32,
898              .fp_proto = FIB_PROTOCOL_IP4,
899              .fp_addr = {
900                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
901              },
902            };
903
904            ip4_add_subnet_bcast_route
905              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
906                                                   sw_if_index),
907               &pfx, sw_if_index);
908          }
909      }));
910   /* *INDENT-ON* */
911 }
912 #endif
913
914 static clib_error_t *
915 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
916 {
917   ip4_main_t *im = &ip4_main;
918   ip_interface_address_t *ia;
919   ip4_address_t *a;
920   u32 is_admin_up, fib_index;
921
922   /* Fill in lookup tables with default table (0). */
923   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
924
925   vec_validate_init_empty (im->
926                            lookup_main.if_address_pool_index_by_sw_if_index,
927                            sw_if_index, ~0);
928
929   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
930
931   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
932
933   /* *INDENT-OFF* */
934   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
935                                 0 /* honor unnumbered */,
936   ({
937     a = ip_interface_address_get_address (&im->lookup_main, ia);
938     if (is_admin_up)
939       ip4_add_interface_routes (sw_if_index,
940                                 im, fib_index,
941                                 ia);
942     else
943       ip4_del_interface_routes (sw_if_index,
944                                 im, fib_index,
945                                 a, ia->address_length);
946   }));
947   /* *INDENT-ON* */
948
949   return 0;
950 }
951
952 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
953
954 /* Built-in ip4 unicast rx feature path definition */
955 /* *INDENT-OFF* */
956 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
957 {
958   .arc_name = "ip4-unicast",
959   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
960   .last_in_arc = "ip4-lookup",
961   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
962 };
963
964 VNET_FEATURE_INIT (ip4_flow_classify, static) =
965 {
966   .arc_name = "ip4-unicast",
967   .node_name = "ip4-flow-classify",
968   .runs_before = VNET_FEATURES ("ip4-inacl"),
969 };
970
971 VNET_FEATURE_INIT (ip4_inacl, static) =
972 {
973   .arc_name = "ip4-unicast",
974   .node_name = "ip4-inacl",
975   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
976 };
977
978 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
979 {
980   .arc_name = "ip4-unicast",
981   .node_name = "ip4-source-and-port-range-check-rx",
982   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
983 };
984
985 VNET_FEATURE_INIT (ip4_policer_classify, static) =
986 {
987   .arc_name = "ip4-unicast",
988   .node_name = "ip4-policer-classify",
989   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
990 };
991
992 VNET_FEATURE_INIT (ip4_ipsec, static) =
993 {
994   .arc_name = "ip4-unicast",
995   .node_name = "ipsec4-input-feature",
996   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
997 };
998
999 VNET_FEATURE_INIT (ip4_vpath, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .node_name = "vpath-input-ip4",
1003   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "ip4-vxlan-bypass",
1010   .runs_before = VNET_FEATURES ("ip4-lookup"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-not-enabled",
1017   .runs_before = VNET_FEATURES ("ip4-lookup"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_lookup, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-lookup",
1024   .runs_before = 0,     /* not before any other features */
1025 };
1026
1027 /* Built-in ip4 multicast rx feature path definition */
1028 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1029 {
1030   .arc_name = "ip4-multicast",
1031   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1032   .last_in_arc = "ip4-mfib-forward-lookup",
1033   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1034 };
1035
1036 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1037 {
1038   .arc_name = "ip4-multicast",
1039   .node_name = "vpath-input-ip4",
1040   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1041 };
1042
1043 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1044 {
1045   .arc_name = "ip4-multicast",
1046   .node_name = "ip4-not-enabled",
1047   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1048 };
1049
1050 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1051 {
1052   .arc_name = "ip4-multicast",
1053   .node_name = "ip4-mfib-forward-lookup",
1054   .runs_before = 0,     /* last feature */
1055 };
1056
1057 /* Source and port-range check ip4 tx feature path definition */
1058 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1059 {
1060   .arc_name = "ip4-output",
1061   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1062   .last_in_arc = "interface-output",
1063   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1064 };
1065
1066 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1067 {
1068   .arc_name = "ip4-output",
1069   .node_name = "ip4-source-and-port-range-check-tx",
1070   .runs_before = VNET_FEATURES ("ip4-outacl"),
1071 };
1072
1073 VNET_FEATURE_INIT (ip4_outacl, static) =
1074 {
1075   .arc_name = "ip4-output",
1076   .node_name = "ip4-outacl",
1077   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1078 };
1079
1080 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1081 {
1082   .arc_name = "ip4-output",
1083   .node_name = "ipsec4-output-feature",
1084   .runs_before = VNET_FEATURES ("interface-output"),
1085 };
1086
1087 /* Built-in ip4 tx feature path definition */
1088 VNET_FEATURE_INIT (ip4_interface_output, static) =
1089 {
1090   .arc_name = "ip4-output",
1091   .node_name = "interface-output",
1092   .runs_before = 0,     /* not before any other features */
1093 };
1094 /* *INDENT-ON* */
1095
1096 static clib_error_t *
1097 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1098 {
1099   ip4_main_t *im = &ip4_main;
1100
1101   /* Fill in lookup tables with default table (0). */
1102   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1103   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1104
1105   if (!is_add)
1106     {
1107       ip4_main_t *im4 = &ip4_main;
1108       ip_lookup_main_t *lm4 = &im4->lookup_main;
1109       ip_interface_address_t *ia = 0;
1110       ip4_address_t *address;
1111       vlib_main_t *vm = vlib_get_main ();
1112
1113       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1114       /* *INDENT-OFF* */
1115       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1116       ({
1117         address = ip_interface_address_get_address (lm4, ia);
1118         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1119       }));
1120       /* *INDENT-ON* */
1121       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1122     }
1123
1124   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1125                                is_add, 0, 0);
1126
1127   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1128                                sw_if_index, is_add, 0, 0);
1129
1130   return /* no error */ 0;
1131 }
1132
1133 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1134
1135 /* Global IP4 main. */
1136 #ifndef CLIB_MARCH_VARIANT
1137 ip4_main_t ip4_main;
1138 #endif /* CLIB_MARCH_VARIANT */
1139
1140 static clib_error_t *
1141 ip4_lookup_init (vlib_main_t * vm)
1142 {
1143   ip4_main_t *im = &ip4_main;
1144   clib_error_t *error;
1145   uword i;
1146
1147   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1148     return error;
1149   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1150     return (error);
1151   if ((error = vlib_call_init_function (vm, fib_module_init)))
1152     return error;
1153   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1154     return error;
1155
1156   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1157     {
1158       u32 m;
1159
1160       if (i < 32)
1161         m = pow2_mask (i) << (32 - i);
1162       else
1163         m = ~0;
1164       im->fib_masks[i] = clib_host_to_net_u32 (m);
1165     }
1166
1167   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1168
1169   /* Create FIB with index 0 and table id of 0. */
1170   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1171                                      FIB_SOURCE_DEFAULT_ROUTE);
1172   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1173                                       MFIB_SOURCE_DEFAULT_ROUTE);
1174
1175   {
1176     pg_node_t *pn;
1177     pn = pg_get_node (ip4_lookup_node.index);
1178     pn->unformat_edit = unformat_pg_ip4_header;
1179   }
1180
1181   {
1182     ethernet_arp_header_t h;
1183
1184     clib_memset (&h, 0, sizeof (h));
1185
1186 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1187 #define _8(f,v) h.f = v;
1188     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1189     _16 (l3_type, ETHERNET_TYPE_IP4);
1190     _8 (n_l2_address_bytes, 6);
1191     _8 (n_l3_address_bytes, 4);
1192     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1193 #undef _16
1194 #undef _8
1195
1196     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1197                                /* data */ &h,
1198                                sizeof (h),
1199                                /* alloc chunk size */ 8,
1200                                "ip4 arp");
1201   }
1202
1203   return error;
1204 }
1205
1206 VLIB_INIT_FUNCTION (ip4_lookup_init);
1207
1208 typedef struct
1209 {
1210   /* Adjacency taken. */
1211   u32 dpo_index;
1212   u32 flow_hash;
1213   u32 fib_index;
1214
1215   /* Packet data, possibly *after* rewrite. */
1216   u8 packet_data[64 - 1 * sizeof (u32)];
1217 }
1218 ip4_forward_next_trace_t;
1219
1220 #ifndef CLIB_MARCH_VARIANT
1221 u8 *
1222 format_ip4_forward_next_trace (u8 * s, va_list * args)
1223 {
1224   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1225   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1226   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1227   u32 indent = format_get_indent (s);
1228   s = format (s, "%U%U",
1229               format_white_space, indent,
1230               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1231   return s;
1232 }
1233 #endif
1234
1235 static u8 *
1236 format_ip4_lookup_trace (u8 * s, va_list * args)
1237 {
1238   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1239   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1240   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1241   u32 indent = format_get_indent (s);
1242
1243   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1244               t->fib_index, t->dpo_index, t->flow_hash);
1245   s = format (s, "\n%U%U",
1246               format_white_space, indent,
1247               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1248   return s;
1249 }
1250
1251 static u8 *
1252 format_ip4_rewrite_trace (u8 * s, va_list * args)
1253 {
1254   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1255   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1256   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1257   u32 indent = format_get_indent (s);
1258
1259   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1260               t->fib_index, t->dpo_index, format_ip_adjacency,
1261               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1262   s = format (s, "\n%U%U",
1263               format_white_space, indent,
1264               format_ip_adjacency_packet_data,
1265               t->packet_data, sizeof (t->packet_data));
1266   return s;
1267 }
1268
1269 #ifndef CLIB_MARCH_VARIANT
1270 /* Common trace function for all ip4-forward next nodes. */
1271 void
1272 ip4_forward_next_trace (vlib_main_t * vm,
1273                         vlib_node_runtime_t * node,
1274                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1275 {
1276   u32 *from, n_left;
1277   ip4_main_t *im = &ip4_main;
1278
1279   n_left = frame->n_vectors;
1280   from = vlib_frame_vector_args (frame);
1281
1282   while (n_left >= 4)
1283     {
1284       u32 bi0, bi1;
1285       vlib_buffer_t *b0, *b1;
1286       ip4_forward_next_trace_t *t0, *t1;
1287
1288       /* Prefetch next iteration. */
1289       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1290       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1291
1292       bi0 = from[0];
1293       bi1 = from[1];
1294
1295       b0 = vlib_get_buffer (vm, bi0);
1296       b1 = vlib_get_buffer (vm, bi1);
1297
1298       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1299         {
1300           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1301           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1302           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1303           t0->fib_index =
1304             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1305              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1306             vec_elt (im->fib_index_by_sw_if_index,
1307                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1308
1309           clib_memcpy_fast (t0->packet_data,
1310                             vlib_buffer_get_current (b0),
1311                             sizeof (t0->packet_data));
1312         }
1313       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1314         {
1315           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1316           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1317           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1318           t1->fib_index =
1319             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1320              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1321             vec_elt (im->fib_index_by_sw_if_index,
1322                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1323           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1324                             sizeof (t1->packet_data));
1325         }
1326       from += 2;
1327       n_left -= 2;
1328     }
1329
1330   while (n_left >= 1)
1331     {
1332       u32 bi0;
1333       vlib_buffer_t *b0;
1334       ip4_forward_next_trace_t *t0;
1335
1336       bi0 = from[0];
1337
1338       b0 = vlib_get_buffer (vm, bi0);
1339
1340       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1341         {
1342           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1343           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1344           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1345           t0->fib_index =
1346             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1347              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1348             vec_elt (im->fib_index_by_sw_if_index,
1349                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1350           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1351                             sizeof (t0->packet_data));
1352         }
1353       from += 1;
1354       n_left -= 1;
1355     }
1356 }
1357
1358 /* Compute TCP/UDP/ICMP4 checksum in software. */
1359 u16
1360 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1361                               ip4_header_t * ip0)
1362 {
1363   ip_csum_t sum0;
1364   u32 ip_header_length, payload_length_host_byte_order;
1365
1366   /* Initialize checksum with ip header. */
1367   ip_header_length = ip4_header_bytes (ip0);
1368   payload_length_host_byte_order =
1369     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1370   sum0 =
1371     clib_host_to_net_u32 (payload_length_host_byte_order +
1372                           (ip0->protocol << 16));
1373
1374   if (BITS (uword) == 32)
1375     {
1376       sum0 =
1377         ip_csum_with_carry (sum0,
1378                             clib_mem_unaligned (&ip0->src_address, u32));
1379       sum0 =
1380         ip_csum_with_carry (sum0,
1381                             clib_mem_unaligned (&ip0->dst_address, u32));
1382     }
1383   else
1384     sum0 =
1385       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1386
1387   return ip_calculate_l4_checksum (vm, p0, sum0,
1388                                    payload_length_host_byte_order, (u8 *) ip0,
1389                                    ip_header_length, NULL);
1390 }
1391
1392 u32
1393 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1394 {
1395   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1396   udp_header_t *udp0;
1397   u16 sum16;
1398
1399   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1400           || ip0->protocol == IP_PROTOCOL_UDP);
1401
1402   udp0 = (void *) (ip0 + 1);
1403   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1404     {
1405       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1406                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1407       return p0->flags;
1408     }
1409
1410   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1411
1412   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1413                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1414
1415   return p0->flags;
1416 }
1417 #endif
1418
1419 /* *INDENT-OFF* */
1420 VNET_FEATURE_ARC_INIT (ip4_local) =
1421 {
1422   .arc_name  = "ip4-local",
1423   .start_nodes = VNET_FEATURES ("ip4-local"),
1424   .last_in_arc = "ip4-local-end-of-arc",
1425 };
1426 /* *INDENT-ON* */
1427
1428 static inline void
1429 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1430                             ip4_header_t * ip, u8 is_udp, u8 * error,
1431                             u8 * good_tcp_udp)
1432 {
1433   u32 flags0;
1434   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1435   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1436   if (is_udp)
1437     {
1438       udp_header_t *udp;
1439       u32 ip_len, udp_len;
1440       i32 len_diff;
1441       udp = ip4_next_header (ip);
1442       /* Verify UDP length. */
1443       ip_len = clib_net_to_host_u16 (ip->length);
1444       udp_len = clib_net_to_host_u16 (udp->length);
1445
1446       len_diff = ip_len - udp_len;
1447       *good_tcp_udp &= len_diff >= 0;
1448       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1449     }
1450 }
1451
1452 #define ip4_local_csum_is_offloaded(_b)                                 \
1453     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1454         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1455
1456 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1457     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1458         || ip4_local_csum_is_offloaded (_b)))
1459
1460 #define ip4_local_csum_is_valid(_b)                                     \
1461     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1462         || (ip4_local_csum_is_offloaded (_b))) != 0
1463
1464 static inline void
1465 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1466                          ip4_header_t * ih, u8 * error)
1467 {
1468   u8 is_udp, is_tcp_udp, good_tcp_udp;
1469
1470   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1471   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1472
1473   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1474     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1475   else
1476     good_tcp_udp = ip4_local_csum_is_valid (b);
1477
1478   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1479   *error = (is_tcp_udp && !good_tcp_udp
1480             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1481 }
1482
1483 static inline void
1484 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1485                             ip4_header_t ** ih, u8 * error)
1486 {
1487   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1488
1489   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1490   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1491
1492   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1493   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1494
1495   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1496   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1497
1498   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1499                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1500     {
1501       if (is_tcp_udp[0])
1502         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1503                                     &good_tcp_udp[0]);
1504       if (is_tcp_udp[1])
1505         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1506                                     &good_tcp_udp[1]);
1507     }
1508
1509   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1510               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1511   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1512               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1513 }
1514
1515 static inline void
1516 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1517                               vlib_buffer_t * b, u16 * next, u8 error,
1518                               u8 head_of_feature_arc)
1519 {
1520   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1521   u32 next_index;
1522
1523   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1524   b->error = error ? error_node->errors[error] : 0;
1525   if (head_of_feature_arc)
1526     {
1527       next_index = *next;
1528       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1529         {
1530           vnet_feature_arc_start (arc_index,
1531                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1532                                   &next_index, b);
1533           *next = next_index;
1534         }
1535     }
1536 }
1537
1538 typedef struct
1539 {
1540   ip4_address_t src;
1541   u32 lbi;
1542   u8 error;
1543   u8 first;
1544 } ip4_local_last_check_t;
1545
1546 static inline void
1547 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1548                      ip4_local_last_check_t * last_check, u8 * error0)
1549 {
1550   ip4_fib_mtrie_leaf_t leaf0;
1551   ip4_fib_mtrie_t *mtrie0;
1552   const dpo_id_t *dpo0;
1553   load_balance_t *lb0;
1554   u32 lbi0;
1555
1556   vnet_buffer (b)->ip.fib_index =
1557     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1558     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1559
1560   /*
1561    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1562    *  adjacency for the destination address (the local interface address).
1563    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1564    *  adjacency for the source address (the remote sender's address)
1565    */
1566   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1567       last_check->first)
1568     {
1569       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1570       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1571       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1572       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1573       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1574
1575       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1576         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1577       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1578
1579       lb0 = load_balance_get (lbi0);
1580       dpo0 = load_balance_get_bucket_i (lb0, 0);
1581
1582       /*
1583        * Must have a route to source otherwise we drop the packet.
1584        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1585        *
1586        * The checks are:
1587        *  - the source is a recieve => it's from us => bogus, do this
1588        *    first since it sets a different error code.
1589        *  - uRPF check for any route to source - accept if passes.
1590        *  - allow packets destined to the broadcast address from unknown sources
1591        */
1592
1593       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1594                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1595                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1596       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1597                   && !fib_urpf_check_size (lb0->lb_urpf)
1598                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1599                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1600
1601       last_check->src.as_u32 = ip0->src_address.as_u32;
1602       last_check->lbi = lbi0;
1603       last_check->error = *error0;
1604       last_check->first = 0;
1605     }
1606   else
1607     {
1608       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1609         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1610       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1611       *error0 = last_check->error;
1612     }
1613 }
1614
1615 static inline void
1616 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1617                         ip4_local_last_check_t * last_check, u8 * error)
1618 {
1619   ip4_fib_mtrie_leaf_t leaf[2];
1620   ip4_fib_mtrie_t *mtrie[2];
1621   const dpo_id_t *dpo[2];
1622   load_balance_t *lb[2];
1623   u32 not_last_hit;
1624   u32 lbi[2];
1625
1626   not_last_hit = last_check->first;
1627   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1628   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1629
1630   vnet_buffer (b[0])->ip.fib_index =
1631     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1632     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1633     vnet_buffer (b[0])->ip.fib_index;
1634
1635   vnet_buffer (b[1])->ip.fib_index =
1636     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1637     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1638     vnet_buffer (b[1])->ip.fib_index;
1639
1640   /*
1641    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1642    *  adjacency for the destination address (the local interface address).
1643    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1644    *  adjacency for the source address (the remote sender's address)
1645    */
1646   if (PREDICT_TRUE (not_last_hit))
1647     {
1648       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1649       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1650
1651       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1652       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1653
1654       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1655                                            &ip[0]->src_address, 2);
1656       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1657                                            &ip[1]->src_address, 2);
1658
1659       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1660                                            &ip[0]->src_address, 3);
1661       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1662                                            &ip[1]->src_address, 3);
1663
1664       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1665       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1666
1667       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1668         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1669       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1670
1671       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1672         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1673       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1674
1675       lb[0] = load_balance_get (lbi[0]);
1676       lb[1] = load_balance_get (lbi[1]);
1677
1678       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1679       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1680
1681       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1682                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1683                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1684       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1685                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1686                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1687                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1688
1689       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1690                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1691                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1692       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1693                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1694                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1695                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1696
1697       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1698       last_check->lbi = lbi[1];
1699       last_check->error = error[1];
1700       last_check->first = 0;
1701     }
1702   else
1703     {
1704       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1705         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1706       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1707
1708       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1709         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1710       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1711
1712       error[0] = last_check->error;
1713       error[1] = last_check->error;
1714     }
1715 }
1716
1717 enum ip_local_packet_type_e
1718 {
1719   IP_LOCAL_PACKET_TYPE_L4,
1720   IP_LOCAL_PACKET_TYPE_NAT,
1721   IP_LOCAL_PACKET_TYPE_FRAG,
1722 };
1723
1724 /**
1725  * Determine packet type and next node.
1726  *
1727  * The expectation is that all packets that are not L4 will skip
1728  * checksums and source checks.
1729  */
1730 always_inline u8
1731 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1732 {
1733   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1734
1735   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1736     {
1737       *next = IP_LOCAL_NEXT_REASSEMBLY;
1738       return IP_LOCAL_PACKET_TYPE_FRAG;
1739     }
1740   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1741     {
1742       *next = lm->local_next_by_ip_protocol[ip->protocol];
1743       return IP_LOCAL_PACKET_TYPE_NAT;
1744     }
1745
1746   *next = lm->local_next_by_ip_protocol[ip->protocol];
1747   return IP_LOCAL_PACKET_TYPE_L4;
1748 }
1749
1750 static inline uword
1751 ip4_local_inline (vlib_main_t * vm,
1752                   vlib_node_runtime_t * node,
1753                   vlib_frame_t * frame, int head_of_feature_arc)
1754 {
1755   u32 *from, n_left_from;
1756   vlib_node_runtime_t *error_node =
1757     vlib_node_get_runtime (vm, ip4_local_node.index);
1758   u16 nexts[VLIB_FRAME_SIZE], *next;
1759   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1760   ip4_header_t *ip[2];
1761   u8 error[2], pt[2];
1762
1763   ip4_local_last_check_t last_check = {
1764     /*
1765      * 0.0.0.0 can appear as the source address of an IP packet,
1766      * as can any other address, hence the need to use the 'first'
1767      * member to make sure the .lbi is initialised for the first
1768      * packet.
1769      */
1770     .src = {.as_u32 = 0},
1771     .lbi = ~0,
1772     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1773     .first = 1,
1774   };
1775
1776   from = vlib_frame_vector_args (frame);
1777   n_left_from = frame->n_vectors;
1778
1779   if (node->flags & VLIB_NODE_FLAG_TRACE)
1780     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1781
1782   vlib_get_buffers (vm, from, bufs, n_left_from);
1783   b = bufs;
1784   next = nexts;
1785
1786   while (n_left_from >= 6)
1787     {
1788       u8 not_batch = 0;
1789
1790       /* Prefetch next iteration. */
1791       {
1792         vlib_prefetch_buffer_header (b[4], LOAD);
1793         vlib_prefetch_buffer_header (b[5], LOAD);
1794
1795         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1796         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1797       }
1798
1799       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1800
1801       ip[0] = vlib_buffer_get_current (b[0]);
1802       ip[1] = vlib_buffer_get_current (b[1]);
1803
1804       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1805       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1806
1807       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1808       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1809
1810       not_batch = pt[0] ^ pt[1];
1811
1812       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1813         goto skip_checks;
1814
1815       if (PREDICT_TRUE (not_batch == 0))
1816         {
1817           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1818           ip4_local_check_src_x2 (b, ip, &last_check, error);
1819         }
1820       else
1821         {
1822           if (!pt[0])
1823             {
1824               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1825               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1826             }
1827           if (!pt[1])
1828             {
1829               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1830               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1831             }
1832         }
1833
1834     skip_checks:
1835
1836       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1837                                     head_of_feature_arc);
1838       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1839                                     head_of_feature_arc);
1840
1841       b += 2;
1842       next += 2;
1843       n_left_from -= 2;
1844     }
1845
1846   while (n_left_from > 0)
1847     {
1848       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1849
1850       ip[0] = vlib_buffer_get_current (b[0]);
1851       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1852       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1853
1854       if (head_of_feature_arc == 0 || pt[0])
1855         goto skip_check;
1856
1857       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1858       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1859
1860     skip_check:
1861
1862       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1863                                     head_of_feature_arc);
1864
1865       b += 1;
1866       next += 1;
1867       n_left_from -= 1;
1868     }
1869
1870   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1871   return frame->n_vectors;
1872 }
1873
1874 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1875                                vlib_frame_t * frame)
1876 {
1877   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1878 }
1879
1880 /* *INDENT-OFF* */
1881 VLIB_REGISTER_NODE (ip4_local_node) =
1882 {
1883   .name = "ip4-local",
1884   .vector_size = sizeof (u32),
1885   .format_trace = format_ip4_forward_next_trace,
1886   .n_errors = IP4_N_ERROR,
1887   .error_strings = ip4_error_strings,
1888   .n_next_nodes = IP_LOCAL_N_NEXT,
1889   .next_nodes =
1890   {
1891     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1892     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1893     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1894     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1895     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1896   },
1897 };
1898 /* *INDENT-ON* */
1899
1900
1901 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1902                                           vlib_node_runtime_t * node,
1903                                           vlib_frame_t * frame)
1904 {
1905   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1906 }
1907
1908 /* *INDENT-OFF* */
1909 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1910   .name = "ip4-local-end-of-arc",
1911   .vector_size = sizeof (u32),
1912
1913   .format_trace = format_ip4_forward_next_trace,
1914   .sibling_of = "ip4-local",
1915 };
1916
1917 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1918   .arc_name = "ip4-local",
1919   .node_name = "ip4-local-end-of-arc",
1920   .runs_before = 0, /* not before any other features */
1921 };
1922 /* *INDENT-ON* */
1923
1924 #ifndef CLIB_MARCH_VARIANT
1925 void
1926 ip4_register_protocol (u32 protocol, u32 node_index)
1927 {
1928   vlib_main_t *vm = vlib_get_main ();
1929   ip4_main_t *im = &ip4_main;
1930   ip_lookup_main_t *lm = &im->lookup_main;
1931
1932   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1933   lm->local_next_by_ip_protocol[protocol] =
1934     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1935 }
1936
1937 void
1938 ip4_unregister_protocol (u32 protocol)
1939 {
1940   ip4_main_t *im = &ip4_main;
1941   ip_lookup_main_t *lm = &im->lookup_main;
1942
1943   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1944   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1945 }
1946 #endif
1947
1948 static clib_error_t *
1949 show_ip_local_command_fn (vlib_main_t * vm,
1950                           unformat_input_t * input, vlib_cli_command_t * cmd)
1951 {
1952   ip4_main_t *im = &ip4_main;
1953   ip_lookup_main_t *lm = &im->lookup_main;
1954   int i;
1955
1956   vlib_cli_output (vm, "Protocols handled by ip4_local");
1957   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1958     {
1959       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1960         {
1961           u32 node_index = vlib_get_node (vm,
1962                                           ip4_local_node.index)->
1963             next_nodes[lm->local_next_by_ip_protocol[i]];
1964           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1965                            format_vlib_node_name, vm, node_index);
1966         }
1967     }
1968   return 0;
1969 }
1970
1971
1972
1973 /*?
1974  * Display the set of protocols handled by the local IPv4 stack.
1975  *
1976  * @cliexpar
1977  * Example of how to display local protocol table:
1978  * @cliexstart{show ip local}
1979  * Protocols handled by ip4_local
1980  * 1
1981  * 17
1982  * 47
1983  * @cliexend
1984 ?*/
1985 /* *INDENT-OFF* */
1986 VLIB_CLI_COMMAND (show_ip_local, static) =
1987 {
1988   .path = "show ip local",
1989   .function = show_ip_local_command_fn,
1990   .short_help = "show ip local",
1991 };
1992 /* *INDENT-ON* */
1993
1994 typedef enum
1995 {
1996   IP4_REWRITE_NEXT_DROP,
1997   IP4_REWRITE_NEXT_ICMP_ERROR,
1998   IP4_REWRITE_NEXT_FRAGMENT,
1999   IP4_REWRITE_N_NEXT            /* Last */
2000 } ip4_rewrite_next_t;
2001
2002 /**
2003  * This bits of an IPv4 address to mask to construct a multicast
2004  * MAC address
2005  */
2006 #if CLIB_ARCH_IS_BIG_ENDIAN
2007 #define IP4_MCAST_ADDR_MASK 0x007fffff
2008 #else
2009 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2010 #endif
2011
2012 always_inline void
2013 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2014                u16 adj_packet_bytes, bool df, u16 * next,
2015                u8 is_midchain, u32 * error)
2016 {
2017   if (packet_len > adj_packet_bytes)
2018     {
2019       *error = IP4_ERROR_MTU_EXCEEDED;
2020       if (df)
2021         {
2022           icmp4_error_set_vnet_buffer
2023             (b, ICMP4_destination_unreachable,
2024              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2025              adj_packet_bytes);
2026           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2027         }
2028       else
2029         {
2030           /* IP fragmentation */
2031           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2032                                    (is_midchain ?
2033                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2034                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2035           *next = IP4_REWRITE_NEXT_FRAGMENT;
2036         }
2037     }
2038 }
2039
2040 /* increment TTL & update checksum.
2041    Works either endian, so no need for byte swap. */
2042 static_always_inline void
2043 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2044 {
2045   i32 ttl;
2046   u32 checksum;
2047   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2048     return;
2049
2050   ttl = ip->ttl;
2051
2052   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2053   checksum += checksum >= 0xffff;
2054
2055   ip->checksum = checksum;
2056   ttl += 1;
2057   ip->ttl = ttl;
2058
2059   ASSERT (ip->checksum == ip4_header_checksum (ip));
2060 }
2061
2062 /* Decrement TTL & update checksum.
2063    Works either endian, so no need for byte swap. */
2064 static_always_inline void
2065 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2066                             u32 * error)
2067 {
2068   i32 ttl;
2069   u32 checksum;
2070   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2071     return;
2072
2073   ttl = ip->ttl;
2074
2075   /* Input node should have reject packets with ttl 0. */
2076   ASSERT (ip->ttl > 0);
2077
2078   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2079   checksum += checksum >= 0xffff;
2080
2081   ip->checksum = checksum;
2082   ttl -= 1;
2083   ip->ttl = ttl;
2084
2085   /*
2086    * If the ttl drops below 1 when forwarding, generate
2087    * an ICMP response.
2088    */
2089   if (PREDICT_FALSE (ttl <= 0))
2090     {
2091       *error = IP4_ERROR_TIME_EXPIRED;
2092       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2093       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2094                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2095                                    0);
2096       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2097     }
2098
2099   /* Verify checksum. */
2100   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2101           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2102 }
2103
2104
2105 always_inline uword
2106 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2107                              vlib_node_runtime_t * node,
2108                              vlib_frame_t * frame,
2109                              int do_counters, int is_midchain, int is_mcast)
2110 {
2111   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2112   u32 *from = vlib_frame_vector_args (frame);
2113   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2114   u16 nexts[VLIB_FRAME_SIZE], *next;
2115   u32 n_left_from;
2116   vlib_node_runtime_t *error_node =
2117     vlib_node_get_runtime (vm, ip4_input_node.index);
2118
2119   n_left_from = frame->n_vectors;
2120   u32 thread_index = vm->thread_index;
2121
2122   vlib_get_buffers (vm, from, bufs, n_left_from);
2123   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2124
2125 #if (CLIB_N_PREFETCHES >= 8)
2126   if (n_left_from >= 6)
2127     {
2128       int i;
2129       for (i = 2; i < 6; i++)
2130         vlib_prefetch_buffer_header (bufs[i], LOAD);
2131     }
2132
2133   next = nexts;
2134   b = bufs;
2135   while (n_left_from >= 8)
2136     {
2137       const ip_adjacency_t *adj0, *adj1;
2138       ip4_header_t *ip0, *ip1;
2139       u32 rw_len0, error0, adj_index0;
2140       u32 rw_len1, error1, adj_index1;
2141       u32 tx_sw_if_index0, tx_sw_if_index1;
2142       u8 *p;
2143
2144       vlib_prefetch_buffer_header (b[6], LOAD);
2145       vlib_prefetch_buffer_header (b[7], LOAD);
2146
2147       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2148       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2149
2150       /*
2151        * pre-fetch the per-adjacency counters
2152        */
2153       if (do_counters)
2154         {
2155           vlib_prefetch_combined_counter (&adjacency_counters,
2156                                           thread_index, adj_index0);
2157           vlib_prefetch_combined_counter (&adjacency_counters,
2158                                           thread_index, adj_index1);
2159         }
2160
2161       ip0 = vlib_buffer_get_current (b[0]);
2162       ip1 = vlib_buffer_get_current (b[1]);
2163
2164       error0 = error1 = IP4_ERROR_NONE;
2165
2166       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2167       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2168
2169       /* Rewrite packet header and updates lengths. */
2170       adj0 = adj_get (adj_index0);
2171       adj1 = adj_get (adj_index1);
2172
2173       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2174       rw_len0 = adj0[0].rewrite_header.data_bytes;
2175       rw_len1 = adj1[0].rewrite_header.data_bytes;
2176       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2177       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2178
2179       p = vlib_buffer_get_current (b[2]);
2180       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2181       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2182
2183       p = vlib_buffer_get_current (b[3]);
2184       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2185       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2186
2187       /* Check MTU of outgoing interface. */
2188       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2189       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2190
2191       if (b[0]->flags & VNET_BUFFER_F_GSO)
2192         ip0_len = gso_mtu_sz (b[0]);
2193       if (b[1]->flags & VNET_BUFFER_F_GSO)
2194         ip1_len = gso_mtu_sz (b[1]);
2195
2196       ip4_mtu_check (b[0], ip0_len,
2197                      adj0[0].rewrite_header.max_l3_packet_bytes,
2198                      ip0->flags_and_fragment_offset &
2199                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2200                      next + 0, is_midchain, &error0);
2201       ip4_mtu_check (b[1], ip1_len,
2202                      adj1[0].rewrite_header.max_l3_packet_bytes,
2203                      ip1->flags_and_fragment_offset &
2204                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2205                      next + 1, is_midchain, &error1);
2206
2207       if (is_mcast)
2208         {
2209           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2210                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2211                     IP4_ERROR_SAME_INTERFACE : error0);
2212           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2213                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2214                     IP4_ERROR_SAME_INTERFACE : error1);
2215         }
2216
2217       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2218        * to see the IP header */
2219       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2220         {
2221           u32 next_index = adj0[0].rewrite_header.next_index;
2222           vlib_buffer_advance (b[0], -(word) rw_len0);
2223
2224           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2225           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2226
2227           if (PREDICT_FALSE
2228               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2229             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2230                                                 tx_sw_if_index0,
2231                                                 &next_index, b[0],
2232                                                 adj0->ia_cfg_index);
2233
2234           next[0] = next_index;
2235           if (is_midchain)
2236             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2237                                         0 /* is_ip6 */ ,
2238                                         0 /* with gso */ );
2239         }
2240       else
2241         {
2242           b[0]->error = error_node->errors[error0];
2243           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2244             ip4_ttl_inc (b[0], ip0);
2245         }
2246       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2247         {
2248           u32 next_index = adj1[0].rewrite_header.next_index;
2249           vlib_buffer_advance (b[1], -(word) rw_len1);
2250
2251           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2252           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2253
2254           if (PREDICT_FALSE
2255               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2256             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2257                                                 tx_sw_if_index1,
2258                                                 &next_index, b[1],
2259                                                 adj1->ia_cfg_index);
2260           next[1] = next_index;
2261           if (is_midchain)
2262             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2263                                         0 /* is_ip6 */ ,
2264                                         0 /* with gso */ );
2265         }
2266       else
2267         {
2268           b[1]->error = error_node->errors[error1];
2269           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2270             ip4_ttl_inc (b[1], ip1);
2271         }
2272
2273       if (is_midchain)
2274         /* Guess we are only writing on ipv4 header. */
2275         vnet_rewrite_two_headers (adj0[0], adj1[0],
2276                                   ip0, ip1, sizeof (ip4_header_t));
2277       else
2278         /* Guess we are only writing on simple Ethernet header. */
2279         vnet_rewrite_two_headers (adj0[0], adj1[0],
2280                                   ip0, ip1, sizeof (ethernet_header_t));
2281
2282       if (do_counters)
2283         {
2284           if (error0 == IP4_ERROR_NONE)
2285             vlib_increment_combined_counter
2286               (&adjacency_counters,
2287                thread_index,
2288                adj_index0, 1,
2289                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2290
2291           if (error1 == IP4_ERROR_NONE)
2292             vlib_increment_combined_counter
2293               (&adjacency_counters,
2294                thread_index,
2295                adj_index1, 1,
2296                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2297         }
2298
2299       if (is_midchain)
2300         {
2301           if (error0 == IP4_ERROR_NONE)
2302             adj_midchain_fixup (vm, adj0, b[0]);
2303           if (error1 == IP4_ERROR_NONE)
2304             adj_midchain_fixup (vm, adj1, b[1]);
2305         }
2306
2307       if (is_mcast)
2308         {
2309           /* copy bytes from the IP address into the MAC rewrite */
2310           if (error0 == IP4_ERROR_NONE)
2311             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2312                                         adj0->rewrite_header.dst_mcast_offset,
2313                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2314           if (error1 == IP4_ERROR_NONE)
2315             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2316                                         adj1->rewrite_header.dst_mcast_offset,
2317                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2318         }
2319
2320       next += 2;
2321       b += 2;
2322       n_left_from -= 2;
2323     }
2324 #elif (CLIB_N_PREFETCHES >= 4)
2325   next = nexts;
2326   b = bufs;
2327   while (n_left_from >= 1)
2328     {
2329       ip_adjacency_t *adj0;
2330       ip4_header_t *ip0;
2331       u32 rw_len0, error0, adj_index0;
2332       u32 tx_sw_if_index0;
2333       u8 *p;
2334
2335       /* Prefetch next iteration */
2336       if (PREDICT_TRUE (n_left_from >= 4))
2337         {
2338           ip_adjacency_t *adj2;
2339           u32 adj_index2;
2340
2341           vlib_prefetch_buffer_header (b[3], LOAD);
2342           vlib_prefetch_buffer_data (b[2], LOAD);
2343
2344           /* Prefetch adj->rewrite_header */
2345           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2346           adj2 = adj_get (adj_index2);
2347           p = (u8 *) adj2;
2348           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2349                          LOAD);
2350         }
2351
2352       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2353
2354       /*
2355        * Prefetch the per-adjacency counters
2356        */
2357       if (do_counters)
2358         {
2359           vlib_prefetch_combined_counter (&adjacency_counters,
2360                                           thread_index, adj_index0);
2361         }
2362
2363       ip0 = vlib_buffer_get_current (b[0]);
2364
2365       error0 = IP4_ERROR_NONE;
2366
2367       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2368
2369       /* Rewrite packet header and updates lengths. */
2370       adj0 = adj_get (adj_index0);
2371
2372       /* Rewrite header was prefetched. */
2373       rw_len0 = adj0[0].rewrite_header.data_bytes;
2374       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2375
2376       /* Check MTU of outgoing interface. */
2377       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2378
2379       if (b[0]->flags & VNET_BUFFER_F_GSO)
2380         ip0_len = gso_mtu_sz (b[0]);
2381
2382       ip4_mtu_check (b[0], ip0_len,
2383                      adj0[0].rewrite_header.max_l3_packet_bytes,
2384                      ip0->flags_and_fragment_offset &
2385                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2386                      next + 0, is_midchain, &error0);
2387
2388       if (is_mcast)
2389         {
2390           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2391                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2392                     IP4_ERROR_SAME_INTERFACE : error0);
2393         }
2394
2395       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2396        * to see the IP header */
2397       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2398         {
2399           u32 next_index = adj0[0].rewrite_header.next_index;
2400           vlib_buffer_advance (b[0], -(word) rw_len0);
2401           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2402           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2403
2404           if (PREDICT_FALSE
2405               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2406             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2407                                                 tx_sw_if_index0,
2408                                                 &next_index, b[0],
2409                                                 adj0->ia_cfg_index);
2410           next[0] = next_index;
2411
2412           if (is_midchain)
2413             {
2414               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2415                                           0 /* is_ip6 */ ,
2416                                           0 /* with gso */ );
2417
2418               /* Guess we are only writing on ipv4 header. */
2419               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2420             }
2421           else
2422             /* Guess we are only writing on simple Ethernet header. */
2423             vnet_rewrite_one_header (adj0[0], ip0,
2424                                      sizeof (ethernet_header_t));
2425
2426           /*
2427            * Bump the per-adjacency counters
2428            */
2429           if (do_counters)
2430             vlib_increment_combined_counter
2431               (&adjacency_counters,
2432                thread_index,
2433                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2434                                                            b[0]) + rw_len0);
2435
2436           if (is_midchain)
2437             adj_midchain_fixup (vm, adj0, b[0]);
2438
2439           if (is_mcast)
2440             /* copy bytes from the IP address into the MAC rewrite */
2441             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2442                                         adj0->rewrite_header.dst_mcast_offset,
2443                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2444         }
2445       else
2446         {
2447           b[0]->error = error_node->errors[error0];
2448           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2449             ip4_ttl_inc (b[0], ip0);
2450         }
2451
2452       next += 1;
2453       b += 1;
2454       n_left_from -= 1;
2455     }
2456 #endif
2457
2458   while (n_left_from > 0)
2459     {
2460       ip_adjacency_t *adj0;
2461       ip4_header_t *ip0;
2462       u32 rw_len0, adj_index0, error0;
2463       u32 tx_sw_if_index0;
2464
2465       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2466
2467       adj0 = adj_get (adj_index0);
2468
2469       if (do_counters)
2470         vlib_prefetch_combined_counter (&adjacency_counters,
2471                                         thread_index, adj_index0);
2472
2473       ip0 = vlib_buffer_get_current (b[0]);
2474
2475       error0 = IP4_ERROR_NONE;
2476
2477       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2478
2479
2480       /* Update packet buffer attributes/set output interface. */
2481       rw_len0 = adj0[0].rewrite_header.data_bytes;
2482       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2483
2484       /* Check MTU of outgoing interface. */
2485       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2486       if (b[0]->flags & VNET_BUFFER_F_GSO)
2487         ip0_len = gso_mtu_sz (b[0]);
2488
2489       ip4_mtu_check (b[0], ip0_len,
2490                      adj0[0].rewrite_header.max_l3_packet_bytes,
2491                      ip0->flags_and_fragment_offset &
2492                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2493                      next + 0, is_midchain, &error0);
2494
2495       if (is_mcast)
2496         {
2497           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2498                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2499                     IP4_ERROR_SAME_INTERFACE : error0);
2500         }
2501
2502       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2503        * to see the IP header */
2504       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2505         {
2506           u32 next_index = adj0[0].rewrite_header.next_index;
2507           vlib_buffer_advance (b[0], -(word) rw_len0);
2508           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2509           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2510
2511           if (PREDICT_FALSE
2512               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2513             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2514                                                 tx_sw_if_index0,
2515                                                 &next_index, b[0],
2516                                                 adj0->ia_cfg_index);
2517           next[0] = next_index;
2518
2519           if (is_midchain)
2520             {
2521               /* this acts on the packet that is about to be encapped */
2522               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2523                                           0 /* is_ip6 */ ,
2524                                           0 /* with gso */ );
2525
2526               /* Guess we are only writing on ipv4 header. */
2527               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2528             }
2529           else
2530             /* Guess we are only writing on simple Ethernet header. */
2531             vnet_rewrite_one_header (adj0[0], ip0,
2532                                      sizeof (ethernet_header_t));
2533
2534           if (do_counters)
2535             vlib_increment_combined_counter
2536               (&adjacency_counters,
2537                thread_index, adj_index0, 1,
2538                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2539
2540           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2541             adj0->sub_type.midchain.fixup_func
2542               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2543
2544           if (is_mcast)
2545             /* copy bytes from the IP address into the MAC rewrite */
2546             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2547                                         adj0->rewrite_header.dst_mcast_offset,
2548                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2549         }
2550       else
2551         {
2552           b[0]->error = error_node->errors[error0];
2553           /* undo the TTL decrement - we'll be back to do it again */
2554           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2555             ip4_ttl_inc (b[0], ip0);
2556         }
2557
2558       next += 1;
2559       b += 1;
2560       n_left_from -= 1;
2561     }
2562
2563
2564   /* Need to do trace after rewrites to pick up new packet data. */
2565   if (node->flags & VLIB_NODE_FLAG_TRACE)
2566     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2567
2568   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2569   return frame->n_vectors;
2570 }
2571
2572 always_inline uword
2573 ip4_rewrite_inline (vlib_main_t * vm,
2574                     vlib_node_runtime_t * node,
2575                     vlib_frame_t * frame,
2576                     int do_counters, int is_midchain, int is_mcast)
2577 {
2578   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2579                                       is_midchain, is_mcast);
2580 }
2581
2582
2583 /** @brief IPv4 rewrite node.
2584     @node ip4-rewrite
2585
2586     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2587     header checksum, fetch the ip adjacency, check the outbound mtu,
2588     apply the adjacency rewrite, and send pkts to the adjacency
2589     rewrite header's rewrite_next_index.
2590
2591     @param vm vlib_main_t corresponding to the current thread
2592     @param node vlib_node_runtime_t
2593     @param frame vlib_frame_t whose contents should be dispatched
2594
2595     @par Graph mechanics: buffer metadata, next index usage
2596
2597     @em Uses:
2598     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2599         - the rewrite adjacency index
2600     - <code>adj->lookup_next_index</code>
2601         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2602           the packet will be dropped.
2603     - <code>adj->rewrite_header</code>
2604         - Rewrite string length, rewrite string, next_index
2605
2606     @em Sets:
2607     - <code>b->current_data, b->current_length</code>
2608         - Updated net of applying the rewrite string
2609
2610     <em>Next Indices:</em>
2611     - <code> adj->rewrite_header.next_index </code>
2612       or @c ip4-drop
2613 */
2614
2615 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2616                                  vlib_frame_t * frame)
2617 {
2618   if (adj_are_counters_enabled ())
2619     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2620   else
2621     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2622 }
2623
2624 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2625                                        vlib_node_runtime_t * node,
2626                                        vlib_frame_t * frame)
2627 {
2628   if (adj_are_counters_enabled ())
2629     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2630   else
2631     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2632 }
2633
2634 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2635                                   vlib_node_runtime_t * node,
2636                                   vlib_frame_t * frame)
2637 {
2638   if (adj_are_counters_enabled ())
2639     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2640   else
2641     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2642 }
2643
2644 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2645                                        vlib_node_runtime_t * node,
2646                                        vlib_frame_t * frame)
2647 {
2648   if (adj_are_counters_enabled ())
2649     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2650   else
2651     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2652 }
2653
2654 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2655                                         vlib_node_runtime_t * node,
2656                                         vlib_frame_t * frame)
2657 {
2658   if (adj_are_counters_enabled ())
2659     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2660   else
2661     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2662 }
2663
2664 /* *INDENT-OFF* */
2665 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2666   .name = "ip4-rewrite",
2667   .vector_size = sizeof (u32),
2668
2669   .format_trace = format_ip4_rewrite_trace,
2670
2671   .n_next_nodes = IP4_REWRITE_N_NEXT,
2672   .next_nodes = {
2673     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2674     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2675     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2676   },
2677 };
2678
2679 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2680   .name = "ip4-rewrite-bcast",
2681   .vector_size = sizeof (u32),
2682
2683   .format_trace = format_ip4_rewrite_trace,
2684   .sibling_of = "ip4-rewrite",
2685 };
2686
2687 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2688   .name = "ip4-rewrite-mcast",
2689   .vector_size = sizeof (u32),
2690
2691   .format_trace = format_ip4_rewrite_trace,
2692   .sibling_of = "ip4-rewrite",
2693 };
2694
2695 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2696   .name = "ip4-mcast-midchain",
2697   .vector_size = sizeof (u32),
2698
2699   .format_trace = format_ip4_rewrite_trace,
2700   .sibling_of = "ip4-rewrite",
2701 };
2702
2703 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2704   .name = "ip4-midchain",
2705   .vector_size = sizeof (u32),
2706   .format_trace = format_ip4_rewrite_trace,
2707   .sibling_of = "ip4-rewrite",
2708 };
2709 /* *INDENT-ON */
2710
2711 static int
2712 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2713 {
2714   ip4_fib_mtrie_t *mtrie0;
2715   ip4_fib_mtrie_leaf_t leaf0;
2716   u32 lbi0;
2717
2718   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2719
2720   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2721   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2722   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2723
2724   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2725
2726   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2727 }
2728
2729 static clib_error_t *
2730 test_lookup_command_fn (vlib_main_t * vm,
2731                         unformat_input_t * input, vlib_cli_command_t * cmd)
2732 {
2733   ip4_fib_t *fib;
2734   u32 table_id = 0;
2735   f64 count = 1;
2736   u32 n;
2737   int i;
2738   ip4_address_t ip4_base_address;
2739   u64 errors = 0;
2740
2741   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2742     {
2743       if (unformat (input, "table %d", &table_id))
2744         {
2745           /* Make sure the entry exists. */
2746           fib = ip4_fib_get (table_id);
2747           if ((fib) && (fib->index != table_id))
2748             return clib_error_return (0, "<fib-index> %d does not exist",
2749                                       table_id);
2750         }
2751       else if (unformat (input, "count %f", &count))
2752         ;
2753
2754       else if (unformat (input, "%U",
2755                          unformat_ip4_address, &ip4_base_address))
2756         ;
2757       else
2758         return clib_error_return (0, "unknown input `%U'",
2759                                   format_unformat_error, input);
2760     }
2761
2762   n = count;
2763
2764   for (i = 0; i < n; i++)
2765     {
2766       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2767         errors++;
2768
2769       ip4_base_address.as_u32 =
2770         clib_host_to_net_u32 (1 +
2771                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2772     }
2773
2774   if (errors)
2775     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2776   else
2777     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2778
2779   return 0;
2780 }
2781
2782 /*?
2783  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2784  * given FIB table to determine if there is a conflict with the
2785  * adjacency table. The fib-id can be determined by using the
2786  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2787  * of 0 is used.
2788  *
2789  * @todo This command uses fib-id, other commands use table-id (not
2790  * just a name, they are different indexes). Would like to change this
2791  * to table-id for consistency.
2792  *
2793  * @cliexpar
2794  * Example of how to run the test lookup command:
2795  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2796  * No errors in 2 lookups
2797  * @cliexend
2798 ?*/
2799 /* *INDENT-OFF* */
2800 VLIB_CLI_COMMAND (lookup_test_command, static) =
2801 {
2802   .path = "test lookup",
2803   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2804   .function = test_lookup_command_fn,
2805 };
2806 /* *INDENT-ON* */
2807
2808 #ifndef CLIB_MARCH_VARIANT
2809 int
2810 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2811 {
2812   u32 fib_index;
2813
2814   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2815
2816   if (~0 == fib_index)
2817     return VNET_API_ERROR_NO_SUCH_FIB;
2818
2819   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2820                                   flow_hash_config);
2821
2822   return 0;
2823 }
2824 #endif
2825
2826 static clib_error_t *
2827 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2828                              unformat_input_t * input,
2829                              vlib_cli_command_t * cmd)
2830 {
2831   int matched = 0;
2832   u32 table_id = 0;
2833   u32 flow_hash_config = 0;
2834   int rv;
2835
2836   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2837     {
2838       if (unformat (input, "table %d", &table_id))
2839         matched = 1;
2840 #define _(a,v) \
2841     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2842       foreach_flow_hash_bit
2843 #undef _
2844         else
2845         break;
2846     }
2847
2848   if (matched == 0)
2849     return clib_error_return (0, "unknown input `%U'",
2850                               format_unformat_error, input);
2851
2852   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2853   switch (rv)
2854     {
2855     case 0:
2856       break;
2857
2858     case VNET_API_ERROR_NO_SUCH_FIB:
2859       return clib_error_return (0, "no such FIB table %d", table_id);
2860
2861     default:
2862       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2863       break;
2864     }
2865
2866   return 0;
2867 }
2868
2869 /*?
2870  * Configure the set of IPv4 fields used by the flow hash.
2871  *
2872  * @cliexpar
2873  * Example of how to set the flow hash on a given table:
2874  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2875  * Example of display the configured flow hash:
2876  * @cliexstart{show ip fib}
2877  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2878  * 0.0.0.0/0
2879  *   unicast-ip4-chain
2880  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2881  *     [0] [@0]: dpo-drop ip6
2882  * 0.0.0.0/32
2883  *   unicast-ip4-chain
2884  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2885  *     [0] [@0]: dpo-drop ip6
2886  * 224.0.0.0/8
2887  *   unicast-ip4-chain
2888  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2889  *     [0] [@0]: dpo-drop ip6
2890  * 6.0.1.2/32
2891  *   unicast-ip4-chain
2892  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2893  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2894  * 7.0.0.1/32
2895  *   unicast-ip4-chain
2896  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2897  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2898  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2899  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2900  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2901  * 240.0.0.0/8
2902  *   unicast-ip4-chain
2903  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2904  *     [0] [@0]: dpo-drop ip6
2905  * 255.255.255.255/32
2906  *   unicast-ip4-chain
2907  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2908  *     [0] [@0]: dpo-drop ip6
2909  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2910  * 0.0.0.0/0
2911  *   unicast-ip4-chain
2912  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2913  *     [0] [@0]: dpo-drop ip6
2914  * 0.0.0.0/32
2915  *   unicast-ip4-chain
2916  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2917  *     [0] [@0]: dpo-drop ip6
2918  * 172.16.1.0/24
2919  *   unicast-ip4-chain
2920  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2921  *     [0] [@4]: ipv4-glean: af_packet0
2922  * 172.16.1.1/32
2923  *   unicast-ip4-chain
2924  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2925  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2926  * 172.16.1.2/32
2927  *   unicast-ip4-chain
2928  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2929  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2930  * 172.16.2.0/24
2931  *   unicast-ip4-chain
2932  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2933  *     [0] [@4]: ipv4-glean: af_packet1
2934  * 172.16.2.1/32
2935  *   unicast-ip4-chain
2936  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2937  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2938  * 224.0.0.0/8
2939  *   unicast-ip4-chain
2940  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2941  *     [0] [@0]: dpo-drop ip6
2942  * 240.0.0.0/8
2943  *   unicast-ip4-chain
2944  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2945  *     [0] [@0]: dpo-drop ip6
2946  * 255.255.255.255/32
2947  *   unicast-ip4-chain
2948  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2949  *     [0] [@0]: dpo-drop ip6
2950  * @cliexend
2951 ?*/
2952 /* *INDENT-OFF* */
2953 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2954 {
2955   .path = "set ip flow-hash",
2956   .short_help =
2957   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2958   .function = set_ip_flow_hash_command_fn,
2959 };
2960 /* *INDENT-ON* */
2961
2962 #ifndef CLIB_MARCH_VARIANT
2963 int
2964 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2965                              u32 table_index)
2966 {
2967   vnet_main_t *vnm = vnet_get_main ();
2968   vnet_interface_main_t *im = &vnm->interface_main;
2969   ip4_main_t *ipm = &ip4_main;
2970   ip_lookup_main_t *lm = &ipm->lookup_main;
2971   vnet_classify_main_t *cm = &vnet_classify_main;
2972   ip4_address_t *if_addr;
2973
2974   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2975     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2976
2977   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2978     return VNET_API_ERROR_NO_SUCH_ENTRY;
2979
2980   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2981   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2982
2983   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2984
2985   if (NULL != if_addr)
2986     {
2987       fib_prefix_t pfx = {
2988         .fp_len = 32,
2989         .fp_proto = FIB_PROTOCOL_IP4,
2990         .fp_addr.ip4 = *if_addr,
2991       };
2992       u32 fib_index;
2993
2994       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2995                                                        sw_if_index);
2996
2997
2998       if (table_index != (u32) ~ 0)
2999         {
3000           dpo_id_t dpo = DPO_INVALID;
3001
3002           dpo_set (&dpo,
3003                    DPO_CLASSIFY,
3004                    DPO_PROTO_IP4,
3005                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3006
3007           fib_table_entry_special_dpo_add (fib_index,
3008                                            &pfx,
3009                                            FIB_SOURCE_CLASSIFY,
3010                                            FIB_ENTRY_FLAG_NONE, &dpo);
3011           dpo_reset (&dpo);
3012         }
3013       else
3014         {
3015           fib_table_entry_special_remove (fib_index,
3016                                           &pfx, FIB_SOURCE_CLASSIFY);
3017         }
3018     }
3019
3020   return 0;
3021 }
3022 #endif
3023
3024 static clib_error_t *
3025 set_ip_classify_command_fn (vlib_main_t * vm,
3026                             unformat_input_t * input,
3027                             vlib_cli_command_t * cmd)
3028 {
3029   u32 table_index = ~0;
3030   int table_index_set = 0;
3031   u32 sw_if_index = ~0;
3032   int rv;
3033
3034   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3035     {
3036       if (unformat (input, "table-index %d", &table_index))
3037         table_index_set = 1;
3038       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3039                          vnet_get_main (), &sw_if_index))
3040         ;
3041       else
3042         break;
3043     }
3044
3045   if (table_index_set == 0)
3046     return clib_error_return (0, "classify table-index must be specified");
3047
3048   if (sw_if_index == ~0)
3049     return clib_error_return (0, "interface / subif must be specified");
3050
3051   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3052
3053   switch (rv)
3054     {
3055     case 0:
3056       break;
3057
3058     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3059       return clib_error_return (0, "No such interface");
3060
3061     case VNET_API_ERROR_NO_SUCH_ENTRY:
3062       return clib_error_return (0, "No such classifier table");
3063     }
3064   return 0;
3065 }
3066
3067 /*?
3068  * Assign a classification table to an interface. The classification
3069  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3070  * commands. Once the table is create, use this command to filter packets
3071  * on an interface.
3072  *
3073  * @cliexpar
3074  * Example of how to assign a classification table to an interface:
3075  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3076 ?*/
3077 /* *INDENT-OFF* */
3078 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3079 {
3080     .path = "set ip classify",
3081     .short_help =
3082     "set ip classify intfc <interface> table-index <classify-idx>",
3083     .function = set_ip_classify_command_fn,
3084 };
3085 /* *INDENT-ON* */
3086
3087 static clib_error_t *
3088 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3089 {
3090   ip4_main_t *im = &ip4_main;
3091   uword heapsize = 0;
3092
3093   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3094     {
3095       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3096         ;
3097       else
3098         return clib_error_return (0,
3099                                   "invalid heap-size parameter `%U'",
3100                                   format_unformat_error, input);
3101     }
3102
3103   im->mtrie_heap_size = heapsize;
3104
3105   return 0;
3106 }
3107
3108 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3109
3110 /*
3111  * fd.io coding-style-patch-verification: ON
3112  *
3113  * Local Variables:
3114  * eval: (c-set-style "gnu")
3115  * End:
3116  */