acff66d994b59a79e6a2ae03939004ac39e0b13e
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57
58 #include <vnet/ip/ip4_forward.h>
59 #include <vnet/interface_output.h>
60 #include <vnet/classify/vnet_classify.h>
61
62 /** @brief IPv4 lookup node.
63     @node ip4-lookup
64
65     This is the main IPv4 lookup dispatch node.
66
67     @param vm vlib_main_t corresponding to the current thread
68     @param node vlib_node_runtime_t
69     @param frame vlib_frame_t whose contents should be dispatched
70
71     @par Graph mechanics: buffer metadata, next index usage
72
73     @em Uses:
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
75         - Indicates the @c sw_if_index value of the interface that the
76           packet was received on.
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
78         - When the value is @c ~0 then the node performs a longest prefix
79           match (LPM) for the packet destination address in the FIB attached
80           to the receive interface.
81         - Otherwise perform LPM for the packet destination address in the
82           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
83           value (0, 1, ...) and not a VRF id.
84
85     @em Sets:
86     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
87         - The lookup result adjacency index.
88
89     <em>Next Index:</em>
90     - Dispatches the packet to the node index found in
91       ip_adjacency_t @c adj->lookup_next_index
92       (where @c adj is the lookup result adjacency).
93 */
94 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95                                 vlib_frame_t * frame)
96 {
97   return ip4_lookup_inline (vm, node, frame);
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left, *from;
119   u32 thread_index = vm->thread_index;
120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
121   u16 nexts[VLIB_FRAME_SIZE], *next;
122
123   from = vlib_frame_vector_args (frame);
124   n_left = frame->n_vectors;
125   next = nexts;
126
127   vlib_get_buffers (vm, from, bufs, n_left);
128
129   while (n_left >= 4)
130     {
131       const load_balance_t *lb0, *lb1;
132       const ip4_header_t *ip0, *ip1;
133       u32 lbi0, hc0, lbi1, hc1;
134       const dpo_id_t *dpo0, *dpo1;
135
136       /* Prefetch next iteration. */
137       {
138         vlib_prefetch_buffer_header (b[2], LOAD);
139         vlib_prefetch_buffer_header (b[3], LOAD);
140
141         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
142         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143       }
144
145       ip0 = vlib_buffer_get_current (b[0]);
146       ip1 = vlib_buffer_get_current (b[1]);
147       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
148       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
149
150       lb0 = load_balance_get (lbi0);
151       lb1 = load_balance_get (lbi1);
152
153       /*
154        * this node is for via FIBs we can re-use the hash value from the
155        * to node if present.
156        * We don't want to use the same hash value at each level in the recursion
157        * graph as that would lead to polarisation
158        */
159       hc0 = hc1 = 0;
160
161       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
162         {
163           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
164             {
165               hc0 = vnet_buffer (b[0])->ip.flow_hash =
166                 vnet_buffer (b[0])->ip.flow_hash >> 1;
167             }
168           else
169             {
170               hc0 = vnet_buffer (b[0])->ip.flow_hash =
171                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
172             }
173           dpo0 = load_balance_get_fwd_bucket
174             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
175         }
176       else
177         {
178           dpo0 = load_balance_get_bucket_i (lb0, 0);
179         }
180       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
181         {
182           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
183             {
184               hc1 = vnet_buffer (b[1])->ip.flow_hash =
185                 vnet_buffer (b[1])->ip.flow_hash >> 1;
186             }
187           else
188             {
189               hc1 = vnet_buffer (b[1])->ip.flow_hash =
190                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
191             }
192           dpo1 = load_balance_get_fwd_bucket
193             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
194         }
195       else
196         {
197           dpo1 = load_balance_get_bucket_i (lb1, 0);
198         }
199
200       next[0] = dpo0->dpoi_next_node;
201       next[1] = dpo1->dpoi_next_node;
202
203       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
204       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
205
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
210
211       b += 2;
212       next += 2;
213       n_left -= 2;
214     }
215
216   while (n_left > 0)
217     {
218       const load_balance_t *lb0;
219       const ip4_header_t *ip0;
220       const dpo_id_t *dpo0;
221       u32 lbi0, hc0;
222
223       ip0 = vlib_buffer_get_current (b[0]);
224       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
225
226       lb0 = load_balance_get (lbi0);
227
228       hc0 = 0;
229       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
230         {
231           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
232             {
233               hc0 = vnet_buffer (b[0])->ip.flow_hash =
234                 vnet_buffer (b[0])->ip.flow_hash >> 1;
235             }
236           else
237             {
238               hc0 = vnet_buffer (b[0])->ip.flow_hash =
239                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
240             }
241           dpo0 = load_balance_get_fwd_bucket
242             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
243         }
244       else
245         {
246           dpo0 = load_balance_get_bucket_i (lb0, 0);
247         }
248
249       next[0] = dpo0->dpoi_next_node;
250       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
251
252       vlib_increment_combined_counter
253         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
254
255       b += 1;
256       next += 1;
257       n_left -= 1;
258     }
259
260   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
261   if (node->flags & VLIB_NODE_FLAG_TRACE)
262     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
263
264   return frame->n_vectors;
265 }
266
267 /* *INDENT-OFF* */
268 VLIB_REGISTER_NODE (ip4_load_balance_node) =
269 {
270   .name = "ip4-load-balance",
271   .vector_size = sizeof (u32),
272   .sibling_of = "ip4-lookup",
273   .format_trace = format_ip4_lookup_trace,
274 };
275 /* *INDENT-ON* */
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   /* *INDENT-OFF* */
288   foreach_ip_interface_address
289     (lm, ia, sw_if_index,
290      1 /* honor unnumbered */ ,
291      ({
292        ip4_address_t * a =
293          ip_interface_address_get_address (lm, ia);
294        result = a;
295        break;
296      }));
297   /* *INDENT-OFF* */
298   if (result_ia)
299     *result_ia = result ? ia : 0;
300   return result;
301 }
302 #endif
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_prefix_routes (ip4_main_t *im,
345                                  u32 sw_if_index,
346                                  u32 fib_index,
347                                  ip_interface_address_t * a)
348 {
349   ip_lookup_main_t *lm = &im->lookup_main;
350   ip_interface_prefix_t *if_prefix;
351   ip4_address_t *address = ip_interface_address_get_address (lm, a);
352
353   ip_interface_prefix_key_t key = {
354     .prefix = {
355       .fp_len = a->address_length,
356       .fp_proto = FIB_PROTOCOL_IP4,
357       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
358     },
359     .sw_if_index = sw_if_index,
360   };
361
362   fib_prefix_t pfx_special = {
363     .fp_proto = FIB_PROTOCOL_IP4,
364   };
365
366   /* If prefix already set on interface, just increment ref count & return */
367   if_prefix = ip_get_interface_prefix (lm, &key);
368   if (if_prefix)
369     {
370       if_prefix->ref_count += 1;
371       return;
372     }
373
374   /* New prefix - allocate a pool entry, initialize it, add to the hash */
375   pool_get (lm->if_prefix_pool, if_prefix);
376   if_prefix->ref_count = 1;
377   if_prefix->src_ia_index = a - lm->if_address_pool;
378   clib_memcpy (&if_prefix->key, &key, sizeof (key));
379   mhash_set (&lm->prefix_to_if_prefix_index, &key,
380              if_prefix - lm->if_prefix_pool, 0 /* old value */);
381
382   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
383   if (a->address_length <= 30)
384     {
385       pfx_special.fp_len = a->address_length;
386       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388       /* set the glean route for the prefix */
389       fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                        FIB_SOURCE_INTERFACE,
391                                        (FIB_ENTRY_FLAG_CONNECTED |
392                                         FIB_ENTRY_FLAG_ATTACHED),
393                                        DPO_PROTO_IP4,
394                                        /* No next-hop address */
395                                        NULL,
396                                        sw_if_index,
397                                        /* invalid FIB index */
398                                        ~0,
399                                        1,
400                                        /* no out-label stack */
401                                        NULL,
402                                        FIB_ROUTE_PATH_FLAG_NONE);
403
404       /* set a drop route for the base address of the prefix */
405       pfx_special.fp_len = 32;
406       pfx_special.fp_addr.ip4.as_u32 =
407         address->as_u32 & im->fib_masks[a->address_length];
408
409       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
410         fib_table_entry_special_add (fib_index, &pfx_special,
411                                      FIB_SOURCE_INTERFACE,
412                                      (FIB_ENTRY_FLAG_DROP |
413                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
414
415       /* set a route for the broadcast address of the prefix */
416       pfx_special.fp_len = 32;
417       pfx_special.fp_addr.ip4.as_u32 =
418         address->as_u32 | ~im->fib_masks[a->address_length];
419       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
420         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
421
422
423     }
424   /* length == 31 - add an attached route for the other address */
425   else if (a->address_length == 31)
426     {
427       pfx_special.fp_len = 32;
428       pfx_special.fp_addr.ip4.as_u32 =
429         address->as_u32 ^ clib_host_to_net_u32(1);
430
431       fib_table_entry_update_one_path (fib_index, &pfx_special,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &pfx_special.fp_addr,
436                                        sw_if_index,
437                                        /* invalid FIB index */
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443 }
444
445 static void
446 ip4_add_interface_routes (u32 sw_if_index,
447                           ip4_main_t * im, u32 fib_index,
448                           ip_interface_address_t * a)
449 {
450   ip_lookup_main_t *lm = &im->lookup_main;
451   ip4_address_t *address = ip_interface_address_get_address (lm, a);
452   fib_prefix_t pfx = {
453     .fp_len = 32,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   /* set special routes for the prefix if needed */
459   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
460
461   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
462     {
463       u32 classify_table_index =
464         lm->classify_table_index_by_sw_if_index[sw_if_index];
465       if (classify_table_index != (u32) ~ 0)
466         {
467           dpo_id_t dpo = DPO_INVALID;
468
469           dpo_set (&dpo,
470                    DPO_CLASSIFY,
471                    DPO_PROTO_IP4,
472                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
473
474           fib_table_entry_special_dpo_add (fib_index,
475                                            &pfx,
476                                            FIB_SOURCE_CLASSIFY,
477                                            FIB_ENTRY_FLAG_NONE, &dpo);
478           dpo_reset (&dpo);
479         }
480     }
481
482   fib_table_entry_update_one_path (fib_index, &pfx,
483                                    FIB_SOURCE_INTERFACE,
484                                    (FIB_ENTRY_FLAG_CONNECTED |
485                                     FIB_ENTRY_FLAG_LOCAL),
486                                    DPO_PROTO_IP4,
487                                    &pfx.fp_addr,
488                                    sw_if_index,
489                                    // invalid FIB index
490                                    ~0,
491                                    1, NULL,
492                                    FIB_ROUTE_PATH_FLAG_NONE);
493 }
494
495 static void
496 ip4_del_interface_prefix_routes (ip4_main_t * im,
497                                  u32 sw_if_index,
498                                  u32 fib_index,
499                                  ip4_address_t * address,
500                                  u32 address_length)
501 {
502   ip_lookup_main_t *lm = &im->lookup_main;
503   ip_interface_prefix_t *if_prefix;
504
505   ip_interface_prefix_key_t key = {
506     .prefix = {
507       .fp_len = address_length,
508       .fp_proto = FIB_PROTOCOL_IP4,
509       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
510     },
511     .sw_if_index = sw_if_index,
512   };
513
514   fib_prefix_t pfx_special = {
515     .fp_len = 32,
516     .fp_proto = FIB_PROTOCOL_IP4,
517   };
518
519   if_prefix = ip_get_interface_prefix (lm, &key);
520   if (!if_prefix)
521     {
522       clib_warning ("Prefix not found while deleting %U",
523                     format_ip4_address_and_length, address, address_length);
524       return;
525     }
526
527   if_prefix->ref_count -= 1;
528
529   /*
530    * Routes need to be adjusted if:
531    * - deleting last intf addr in prefix
532    * - deleting intf addr used as default source address in glean adjacency
533    *
534    * We're done now otherwise
535    */
536   if ((if_prefix->ref_count > 0) &&
537       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543
544       /* remove glean route for prefix */
545       pfx_special.fp_addr.ip4 = *address;
546       pfx_special.fp_len = address_length;
547       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
548
549       /* if no more intf addresses in prefix, remove other special routes */
550       if (!if_prefix->ref_count)
551         {
552           /* first address in prefix */
553           pfx_special.fp_addr.ip4.as_u32 =
554             address->as_u32 & im->fib_masks[address_length];
555           pfx_special.fp_len = 32;
556
557           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
558           fib_table_entry_special_remove (fib_index,
559                                           &pfx_special,
560                                           FIB_SOURCE_INTERFACE);
561
562           /* prefix broadcast address */
563           pfx_special.fp_addr.ip4.as_u32 =
564             address->as_u32 | ~im->fib_masks[address_length];
565           pfx_special.fp_len = 32;
566
567           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
568           fib_table_entry_special_remove (fib_index,
569                                           &pfx_special,
570                                           FIB_SOURCE_INTERFACE);
571         }
572       else
573         /* default source addr just got deleted, find another */
574         {
575           ip_interface_address_t *new_src_ia = NULL;
576           ip4_address_t *new_src_addr = NULL;
577
578           new_src_addr =
579             ip4_interface_address_matching_destination
580               (im, address, sw_if_index, &new_src_ia);
581
582           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
583
584           pfx_special.fp_len = address_length;
585           pfx_special.fp_addr.ip4 = *new_src_addr;
586
587           /* set new glean route for the prefix */
588           fib_table_entry_update_one_path (fib_index, &pfx_special,
589                                            FIB_SOURCE_INTERFACE,
590                                            (FIB_ENTRY_FLAG_CONNECTED |
591                                             FIB_ENTRY_FLAG_ATTACHED),
592                                            DPO_PROTO_IP4,
593                                            /* No next-hop address */
594                                            NULL,
595                                            sw_if_index,
596                                            /* invalid FIB index */
597                                            ~0,
598                                            1,
599                                            /* no out-label stack */
600                                            NULL,
601                                            FIB_ROUTE_PATH_FLAG_NONE);
602           return;
603         }
604     }
605   /* length == 31, delete attached route for the other address */
606   else if (address_length == 31)
607     {
608       pfx_special.fp_addr.ip4.as_u32 =
609         address->as_u32 ^ clib_host_to_net_u32(1);
610
611       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612     }
613
614   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
615   pool_put (lm->if_prefix_pool, if_prefix);
616 }
617
618 static void
619 ip4_del_interface_routes (u32 sw_if_index,
620                           ip4_main_t * im,
621                           u32 fib_index,
622                           ip4_address_t * address, u32 address_length)
623 {
624   fib_prefix_t pfx = {
625     .fp_len = address_length,
626     .fp_proto = FIB_PROTOCOL_IP4,
627     .fp_addr.ip4 = *address,
628   };
629
630   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
631                                    address, address_length);
632
633   pfx.fp_len = 32;
634   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 }
636
637 #ifndef CLIB_MARCH_VARIANT
638 void
639 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
640 {
641   ip4_main_t *im = &ip4_main;
642
643   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644
645   /*
646    * enable/disable only on the 1<->0 transition
647    */
648   if (is_enable)
649     {
650       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
651         return;
652     }
653   else
654     {
655       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
656       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657         return;
658     }
659   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
660                                !is_enable, 0, 0);
661
662
663   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
664                                sw_if_index, !is_enable, 0, 0);
665
666   {
667     ip4_enable_disable_interface_callback_t *cb;
668     vec_foreach (cb, im->enable_disable_interface_callbacks)
669       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
670   }
671 }
672
673 static clib_error_t *
674 ip4_add_del_interface_address_internal (vlib_main_t * vm,
675                                         u32 sw_if_index,
676                                         ip4_address_t * address,
677                                         u32 address_length, u32 is_del)
678 {
679   vnet_main_t *vnm = vnet_get_main ();
680   ip4_main_t *im = &ip4_main;
681   ip_lookup_main_t *lm = &im->lookup_main;
682   clib_error_t *error = 0;
683   u32 if_address_index;
684   ip4_address_fib_t ip4_af, *addr_fib = 0;
685
686   /* local0 interface doesn't support IP addressing  */
687   if (sw_if_index == 0)
688     {
689       return
690        clib_error_create ("local0 interface doesn't support IP addressing");
691     }
692
693   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
694   ip4_addr_fib_init (&ip4_af, address,
695                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
696   vec_add1 (addr_fib, ip4_af);
697
698   /*
699    * there is no support for adj-fib handling in the presence of overlapping
700    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
701    * most routers do.
702    */
703   /* *INDENT-OFF* */
704   if (!is_del)
705     {
706       /* When adding an address check that it does not conflict
707          with an existing address on any interface in this table. */
708       ip_interface_address_t *ia;
709       vnet_sw_interface_t *sif;
710
711       pool_foreach(sif, vnm->interface_main.sw_interfaces,
712       ({
713           if (im->fib_index_by_sw_if_index[sw_if_index] ==
714               im->fib_index_by_sw_if_index[sif->sw_if_index])
715             {
716               foreach_ip_interface_address
717                 (&im->lookup_main, ia, sif->sw_if_index,
718                  0 /* honor unnumbered */ ,
719                  ({
720                    ip4_address_t * x =
721                      ip_interface_address_get_address
722                      (&im->lookup_main, ia);
723
724                    if (ip4_destination_matches_route
725                        (im, address, x, ia->address_length) ||
726                        ip4_destination_matches_route (im,
727                                                       x,
728                                                       address,
729                                                       address_length))
730                      {
731                        /* an intf may have >1 addr from the same prefix */
732                        if ((sw_if_index == sif->sw_if_index) &&
733                            (ia->address_length == address_length) &&
734                            (x->as_u32 != address->as_u32))
735                          continue;
736
737                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
738                          /* if the address we're comparing against is stale
739                           * then the CP has not added this one back yet, maybe
740                           * it never will, so we have to assume it won't and
741                           * ignore it. if it does add it back, then it will fail
742                           * because this one is now present */
743                          continue;
744
745                        /* error if the length or intf was different */
746                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
747
748                        error = clib_error_create
749                          ("failed to add %U on %U which conflicts with %U for interface %U",
750                           format_ip4_address_and_length, address,
751                           address_length,
752                           format_vnet_sw_if_index_name, vnm,
753                           sw_if_index,
754                           format_ip4_address_and_length, x,
755                           ia->address_length,
756                           format_vnet_sw_if_index_name, vnm,
757                           sif->sw_if_index);
758                        goto done;
759                      }
760                  }));
761             }
762       }));
763     }
764   /* *INDENT-ON* */
765
766   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
767
768   if (is_del)
769     {
770       if (~0 == if_address_index)
771         {
772           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
773           error = clib_error_create ("%U not found for interface %U",
774                                      lm->format_address_and_length,
775                                      addr_fib, address_length,
776                                      format_vnet_sw_if_index_name, vnm,
777                                      sw_if_index);
778           goto done;
779         }
780
781       ip_interface_address_del (lm, if_address_index, addr_fib);
782     }
783   else
784     {
785       if (~0 != if_address_index)
786         {
787           ip_interface_address_t *ia;
788
789           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
790
791           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
792             {
793               if (ia->sw_if_index == sw_if_index)
794                 {
795                   /* re-adding an address during the replace action.
796                    * consdier this the update. clear the flag and
797                    * we're done */
798                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
799                   goto done;
800                 }
801               else
802                 {
803                   /* The prefix is moving from one interface to another.
804                    * delete the stale and add the new */
805                   ip4_add_del_interface_address_internal (vm,
806                                                           ia->sw_if_index,
807                                                           address,
808                                                           address_length, 1);
809                   ia = NULL;
810                   error = ip_interface_address_add (lm, sw_if_index,
811                                                     addr_fib, address_length,
812                                                     &if_address_index);
813                 }
814             }
815           else
816             {
817               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
818               error = clib_error_create
819                 ("Prefix %U already found on interface %U",
820                  lm->format_address_and_length, addr_fib, address_length,
821                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
822             }
823         }
824       else
825         error = ip_interface_address_add (lm, sw_if_index,
826                                           addr_fib, address_length,
827                                           &if_address_index);
828     }
829
830   if (error)
831     goto done;
832
833   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
834   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
835
836   /* intf addr routes are added/deleted on admin up/down */
837   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
838     {
839       if (is_del)
840         ip4_del_interface_routes (sw_if_index,
841                                   im, ip4_af.fib_index, address,
842                                   address_length);
843       else
844         ip4_add_interface_routes (sw_if_index,
845                                   im, ip4_af.fib_index,
846                                   pool_elt_at_index
847                                   (lm->if_address_pool, if_address_index));
848     }
849
850   ip4_add_del_interface_address_callback_t *cb;
851   vec_foreach (cb, im->add_del_interface_address_callbacks)
852     cb->function (im, cb->function_opaque, sw_if_index,
853                   address, address_length, if_address_index, is_del);
854
855 done:
856   vec_free (addr_fib);
857   return error;
858 }
859
860 clib_error_t *
861 ip4_add_del_interface_address (vlib_main_t * vm,
862                                u32 sw_if_index,
863                                ip4_address_t * address,
864                                u32 address_length, u32 is_del)
865 {
866   return ip4_add_del_interface_address_internal
867     (vm, sw_if_index, address, address_length, is_del);
868 }
869
870 void
871 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
872 {
873   ip_interface_address_t *ia;
874   ip4_main_t *im;
875
876   im = &ip4_main;
877
878   /*
879    * when directed broadcast is enabled, the subnet braodcast route will forward
880    * packets using an adjacency with a broadcast MAC. otherwise it drops
881    */
882   /* *INDENT-OFF* */
883   foreach_ip_interface_address(&im->lookup_main, ia,
884                                sw_if_index, 0,
885      ({
886        if (ia->address_length <= 30)
887          {
888            ip4_address_t *ipa;
889
890            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
891
892            fib_prefix_t pfx = {
893              .fp_len = 32,
894              .fp_proto = FIB_PROTOCOL_IP4,
895              .fp_addr = {
896                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
897              },
898            };
899
900            ip4_add_subnet_bcast_route
901              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
902                                                   sw_if_index),
903               &pfx, sw_if_index);
904          }
905      }));
906   /* *INDENT-ON* */
907 }
908 #endif
909
910 static clib_error_t *
911 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
912 {
913   ip4_main_t *im = &ip4_main;
914   ip_interface_address_t *ia;
915   ip4_address_t *a;
916   u32 is_admin_up, fib_index;
917
918   /* Fill in lookup tables with default table (0). */
919   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
920
921   vec_validate_init_empty (im->
922                            lookup_main.if_address_pool_index_by_sw_if_index,
923                            sw_if_index, ~0);
924
925   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
926
927   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
928
929   /* *INDENT-OFF* */
930   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
931                                 0 /* honor unnumbered */,
932   ({
933     a = ip_interface_address_get_address (&im->lookup_main, ia);
934     if (is_admin_up)
935       ip4_add_interface_routes (sw_if_index,
936                                 im, fib_index,
937                                 ia);
938     else
939       ip4_del_interface_routes (sw_if_index,
940                                 im, fib_index,
941                                 a, ia->address_length);
942   }));
943   /* *INDENT-ON* */
944
945   return 0;
946 }
947
948 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
949
950 /* Built-in ip4 unicast rx feature path definition */
951 /* *INDENT-OFF* */
952 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
953 {
954   .arc_name = "ip4-unicast",
955   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
956   .last_in_arc = "ip4-lookup",
957   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
958 };
959
960 VNET_FEATURE_INIT (ip4_flow_classify, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-flow-classify",
964   .runs_before = VNET_FEATURES ("ip4-inacl"),
965 };
966
967 VNET_FEATURE_INIT (ip4_inacl, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "ip4-inacl",
971   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
972 };
973
974 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-source-and-port-range-check-rx",
978   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
979 };
980
981 VNET_FEATURE_INIT (ip4_policer_classify, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-policer-classify",
985   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
986 };
987
988 VNET_FEATURE_INIT (ip4_ipsec, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ipsec4-input-feature",
992   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
993 };
994
995 VNET_FEATURE_INIT (ip4_vpath, static) =
996 {
997   .arc_name = "ip4-unicast",
998   .node_name = "vpath-input-ip4",
999   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1000 };
1001
1002 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1003 {
1004   .arc_name = "ip4-unicast",
1005   .node_name = "ip4-vxlan-bypass",
1006   .runs_before = VNET_FEATURES ("ip4-lookup"),
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1010 {
1011   .arc_name = "ip4-unicast",
1012   .node_name = "ip4-not-enabled",
1013   .runs_before = VNET_FEATURES ("ip4-lookup"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_lookup, static) =
1017 {
1018   .arc_name = "ip4-unicast",
1019   .node_name = "ip4-lookup",
1020   .runs_before = 0,     /* not before any other features */
1021 };
1022
1023 /* Built-in ip4 multicast rx feature path definition */
1024 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1025 {
1026   .arc_name = "ip4-multicast",
1027   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1028   .last_in_arc = "ip4-mfib-forward-lookup",
1029   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1030 };
1031
1032 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1033 {
1034   .arc_name = "ip4-multicast",
1035   .node_name = "vpath-input-ip4",
1036   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1037 };
1038
1039 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1040 {
1041   .arc_name = "ip4-multicast",
1042   .node_name = "ip4-not-enabled",
1043   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1044 };
1045
1046 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1047 {
1048   .arc_name = "ip4-multicast",
1049   .node_name = "ip4-mfib-forward-lookup",
1050   .runs_before = 0,     /* last feature */
1051 };
1052
1053 /* Source and port-range check ip4 tx feature path definition */
1054 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1055 {
1056   .arc_name = "ip4-output",
1057   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1058   .last_in_arc = "interface-output",
1059   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1060 };
1061
1062 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1063 {
1064   .arc_name = "ip4-output",
1065   .node_name = "ip4-source-and-port-range-check-tx",
1066   .runs_before = VNET_FEATURES ("ip4-outacl"),
1067 };
1068
1069 VNET_FEATURE_INIT (ip4_outacl, static) =
1070 {
1071   .arc_name = "ip4-output",
1072   .node_name = "ip4-outacl",
1073   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1074 };
1075
1076 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1077 {
1078   .arc_name = "ip4-output",
1079   .node_name = "ipsec4-output-feature",
1080   .runs_before = VNET_FEATURES ("interface-output"),
1081 };
1082
1083 /* Built-in ip4 tx feature path definition */
1084 VNET_FEATURE_INIT (ip4_interface_output, static) =
1085 {
1086   .arc_name = "ip4-output",
1087   .node_name = "interface-output",
1088   .runs_before = 0,     /* not before any other features */
1089 };
1090 /* *INDENT-ON* */
1091
1092 static clib_error_t *
1093 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1094 {
1095   ip4_main_t *im = &ip4_main;
1096
1097   /* Fill in lookup tables with default table (0). */
1098   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1099   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1100
1101   if (!is_add)
1102     {
1103       ip4_main_t *im4 = &ip4_main;
1104       ip_lookup_main_t *lm4 = &im4->lookup_main;
1105       ip_interface_address_t *ia = 0;
1106       ip4_address_t *address;
1107       vlib_main_t *vm = vlib_get_main ();
1108
1109       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1110       /* *INDENT-OFF* */
1111       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1112       ({
1113         address = ip_interface_address_get_address (lm4, ia);
1114         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1115       }));
1116       /* *INDENT-ON* */
1117       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1118     }
1119
1120   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1121                                is_add, 0, 0);
1122
1123   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1124                                sw_if_index, is_add, 0, 0);
1125
1126   return /* no error */ 0;
1127 }
1128
1129 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1130
1131 /* Global IP4 main. */
1132 #ifndef CLIB_MARCH_VARIANT
1133 ip4_main_t ip4_main;
1134 #endif /* CLIB_MARCH_VARIANT */
1135
1136 static clib_error_t *
1137 ip4_lookup_init (vlib_main_t * vm)
1138 {
1139   ip4_main_t *im = &ip4_main;
1140   clib_error_t *error;
1141   uword i;
1142
1143   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1144     return error;
1145   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1146     return (error);
1147   if ((error = vlib_call_init_function (vm, fib_module_init)))
1148     return error;
1149   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1150     return error;
1151
1152   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1153     {
1154       u32 m;
1155
1156       if (i < 32)
1157         m = pow2_mask (i) << (32 - i);
1158       else
1159         m = ~0;
1160       im->fib_masks[i] = clib_host_to_net_u32 (m);
1161     }
1162
1163   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1164
1165   /* Create FIB with index 0 and table id of 0. */
1166   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1167                                      FIB_SOURCE_DEFAULT_ROUTE);
1168   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1169                                       MFIB_SOURCE_DEFAULT_ROUTE);
1170
1171   {
1172     pg_node_t *pn;
1173     pn = pg_get_node (ip4_lookup_node.index);
1174     pn->unformat_edit = unformat_pg_ip4_header;
1175   }
1176
1177   {
1178     ethernet_arp_header_t h;
1179
1180     clib_memset (&h, 0, sizeof (h));
1181
1182 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1183 #define _8(f,v) h.f = v;
1184     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1185     _16 (l3_type, ETHERNET_TYPE_IP4);
1186     _8 (n_l2_address_bytes, 6);
1187     _8 (n_l3_address_bytes, 4);
1188     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1189 #undef _16
1190 #undef _8
1191
1192     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1193                                /* data */ &h,
1194                                sizeof (h),
1195                                /* alloc chunk size */ 8,
1196                                "ip4 arp");
1197   }
1198
1199   return error;
1200 }
1201
1202 VLIB_INIT_FUNCTION (ip4_lookup_init);
1203
1204 typedef struct
1205 {
1206   /* Adjacency taken. */
1207   u32 dpo_index;
1208   u32 flow_hash;
1209   u32 fib_index;
1210
1211   /* Packet data, possibly *after* rewrite. */
1212   u8 packet_data[64 - 1 * sizeof (u32)];
1213 }
1214 ip4_forward_next_trace_t;
1215
1216 #ifndef CLIB_MARCH_VARIANT
1217 u8 *
1218 format_ip4_forward_next_trace (u8 * s, va_list * args)
1219 {
1220   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1221   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1222   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1223   u32 indent = format_get_indent (s);
1224   s = format (s, "%U%U",
1225               format_white_space, indent,
1226               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1227   return s;
1228 }
1229 #endif
1230
1231 static u8 *
1232 format_ip4_lookup_trace (u8 * s, va_list * args)
1233 {
1234   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1235   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1236   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1237   u32 indent = format_get_indent (s);
1238
1239   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1240               t->fib_index, t->dpo_index, t->flow_hash);
1241   s = format (s, "\n%U%U",
1242               format_white_space, indent,
1243               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1244   return s;
1245 }
1246
1247 static u8 *
1248 format_ip4_rewrite_trace (u8 * s, va_list * args)
1249 {
1250   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1251   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1252   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1253   u32 indent = format_get_indent (s);
1254
1255   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1256               t->fib_index, t->dpo_index, format_ip_adjacency,
1257               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1258   s = format (s, "\n%U%U",
1259               format_white_space, indent,
1260               format_ip_adjacency_packet_data,
1261               t->packet_data, sizeof (t->packet_data));
1262   return s;
1263 }
1264
1265 #ifndef CLIB_MARCH_VARIANT
1266 /* Common trace function for all ip4-forward next nodes. */
1267 void
1268 ip4_forward_next_trace (vlib_main_t * vm,
1269                         vlib_node_runtime_t * node,
1270                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1271 {
1272   u32 *from, n_left;
1273   ip4_main_t *im = &ip4_main;
1274
1275   n_left = frame->n_vectors;
1276   from = vlib_frame_vector_args (frame);
1277
1278   while (n_left >= 4)
1279     {
1280       u32 bi0, bi1;
1281       vlib_buffer_t *b0, *b1;
1282       ip4_forward_next_trace_t *t0, *t1;
1283
1284       /* Prefetch next iteration. */
1285       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1286       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1287
1288       bi0 = from[0];
1289       bi1 = from[1];
1290
1291       b0 = vlib_get_buffer (vm, bi0);
1292       b1 = vlib_get_buffer (vm, bi1);
1293
1294       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1295         {
1296           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1297           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1298           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1299           t0->fib_index =
1300             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1301              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1302             vec_elt (im->fib_index_by_sw_if_index,
1303                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1304
1305           clib_memcpy_fast (t0->packet_data,
1306                             vlib_buffer_get_current (b0),
1307                             sizeof (t0->packet_data));
1308         }
1309       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1310         {
1311           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1312           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1313           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1314           t1->fib_index =
1315             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1316              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1317             vec_elt (im->fib_index_by_sw_if_index,
1318                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1319           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1320                             sizeof (t1->packet_data));
1321         }
1322       from += 2;
1323       n_left -= 2;
1324     }
1325
1326   while (n_left >= 1)
1327     {
1328       u32 bi0;
1329       vlib_buffer_t *b0;
1330       ip4_forward_next_trace_t *t0;
1331
1332       bi0 = from[0];
1333
1334       b0 = vlib_get_buffer (vm, bi0);
1335
1336       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1337         {
1338           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1339           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1340           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1341           t0->fib_index =
1342             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1343              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1344             vec_elt (im->fib_index_by_sw_if_index,
1345                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1346           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1347                             sizeof (t0->packet_data));
1348         }
1349       from += 1;
1350       n_left -= 1;
1351     }
1352 }
1353
1354 /* Compute TCP/UDP/ICMP4 checksum in software. */
1355 u16
1356 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1357                               ip4_header_t * ip0)
1358 {
1359   ip_csum_t sum0;
1360   u32 ip_header_length, payload_length_host_byte_order;
1361
1362   /* Initialize checksum with ip header. */
1363   ip_header_length = ip4_header_bytes (ip0);
1364   payload_length_host_byte_order =
1365     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1366   sum0 =
1367     clib_host_to_net_u32 (payload_length_host_byte_order +
1368                           (ip0->protocol << 16));
1369
1370   if (BITS (uword) == 32)
1371     {
1372       sum0 =
1373         ip_csum_with_carry (sum0,
1374                             clib_mem_unaligned (&ip0->src_address, u32));
1375       sum0 =
1376         ip_csum_with_carry (sum0,
1377                             clib_mem_unaligned (&ip0->dst_address, u32));
1378     }
1379   else
1380     sum0 =
1381       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1382
1383   return ip_calculate_l4_checksum (vm, p0, sum0,
1384                                    payload_length_host_byte_order, (u8 *) ip0,
1385                                    ip_header_length, NULL);
1386 }
1387
1388 u32
1389 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1390 {
1391   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1392   udp_header_t *udp0;
1393   u16 sum16;
1394
1395   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1396           || ip0->protocol == IP_PROTOCOL_UDP);
1397
1398   udp0 = (void *) (ip0 + 1);
1399   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1400     {
1401       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1402                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1403       return p0->flags;
1404     }
1405
1406   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1407
1408   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1409                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1410
1411   return p0->flags;
1412 }
1413 #endif
1414
1415 /* *INDENT-OFF* */
1416 VNET_FEATURE_ARC_INIT (ip4_local) =
1417 {
1418   .arc_name  = "ip4-local",
1419   .start_nodes = VNET_FEATURES ("ip4-local"),
1420   .last_in_arc = "ip4-local-end-of-arc",
1421 };
1422 /* *INDENT-ON* */
1423
1424 static inline void
1425 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1426                             ip4_header_t * ip, u8 is_udp, u8 * error,
1427                             u8 * good_tcp_udp)
1428 {
1429   u32 flags0;
1430   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1431   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1432   if (is_udp)
1433     {
1434       udp_header_t *udp;
1435       u32 ip_len, udp_len;
1436       i32 len_diff;
1437       udp = ip4_next_header (ip);
1438       /* Verify UDP length. */
1439       ip_len = clib_net_to_host_u16 (ip->length);
1440       udp_len = clib_net_to_host_u16 (udp->length);
1441
1442       len_diff = ip_len - udp_len;
1443       *good_tcp_udp &= len_diff >= 0;
1444       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1445     }
1446 }
1447
1448 #define ip4_local_csum_is_offloaded(_b)                                 \
1449     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1450         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1451
1452 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1453     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1454         || ip4_local_csum_is_offloaded (_b)))
1455
1456 #define ip4_local_csum_is_valid(_b)                                     \
1457     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1458         || (ip4_local_csum_is_offloaded (_b))) != 0
1459
1460 static inline void
1461 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1462                          ip4_header_t * ih, u8 * error)
1463 {
1464   u8 is_udp, is_tcp_udp, good_tcp_udp;
1465
1466   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1467   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1468
1469   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1470     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1471   else
1472     good_tcp_udp = ip4_local_csum_is_valid (b);
1473
1474   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1475   *error = (is_tcp_udp && !good_tcp_udp
1476             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1477 }
1478
1479 static inline void
1480 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1481                             ip4_header_t ** ih, u8 * error)
1482 {
1483   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1484
1485   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1486   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1487
1488   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1489   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1490
1491   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1492   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1493
1494   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1495                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1496     {
1497       if (is_tcp_udp[0])
1498         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1499                                     &good_tcp_udp[0]);
1500       if (is_tcp_udp[1])
1501         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1502                                     &good_tcp_udp[1]);
1503     }
1504
1505   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1506               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1507   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1508               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1509 }
1510
1511 static inline void
1512 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1513                               vlib_buffer_t * b, u16 * next, u8 error,
1514                               u8 head_of_feature_arc)
1515 {
1516   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1517   u32 next_index;
1518
1519   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1520   b->error = error ? error_node->errors[error] : 0;
1521   if (head_of_feature_arc)
1522     {
1523       next_index = *next;
1524       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1525         {
1526           vnet_feature_arc_start (arc_index,
1527                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1528                                   &next_index, b);
1529           *next = next_index;
1530         }
1531     }
1532 }
1533
1534 typedef struct
1535 {
1536   ip4_address_t src;
1537   u32 lbi;
1538   u8 error;
1539   u8 first;
1540 } ip4_local_last_check_t;
1541
1542 static inline void
1543 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1544                      ip4_local_last_check_t * last_check, u8 * error0)
1545 {
1546   ip4_fib_mtrie_leaf_t leaf0;
1547   ip4_fib_mtrie_t *mtrie0;
1548   const dpo_id_t *dpo0;
1549   load_balance_t *lb0;
1550   u32 lbi0;
1551
1552   vnet_buffer (b)->ip.fib_index =
1553     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1554     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1555
1556   /*
1557    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1558    *  adjacency for the destination address (the local interface address).
1559    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1560    *  adjacency for the source address (the remote sender's address)
1561    */
1562   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1563       last_check->first)
1564     {
1565       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1566       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1567       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1568       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1569       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1570
1571       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1572         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1573       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1574
1575       lb0 = load_balance_get (lbi0);
1576       dpo0 = load_balance_get_bucket_i (lb0, 0);
1577
1578       /*
1579        * Must have a route to source otherwise we drop the packet.
1580        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1581        *
1582        * The checks are:
1583        *  - the source is a recieve => it's from us => bogus, do this
1584        *    first since it sets a different error code.
1585        *  - uRPF check for any route to source - accept if passes.
1586        *  - allow packets destined to the broadcast address from unknown sources
1587        */
1588
1589       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1590                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1591                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1592       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1593                   && !fib_urpf_check_size (lb0->lb_urpf)
1594                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1595                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1596
1597       last_check->src.as_u32 = ip0->src_address.as_u32;
1598       last_check->lbi = lbi0;
1599       last_check->error = *error0;
1600       last_check->first = 0;
1601     }
1602   else
1603     {
1604       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1605         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1606       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1607       *error0 = last_check->error;
1608     }
1609 }
1610
1611 static inline void
1612 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1613                         ip4_local_last_check_t * last_check, u8 * error)
1614 {
1615   ip4_fib_mtrie_leaf_t leaf[2];
1616   ip4_fib_mtrie_t *mtrie[2];
1617   const dpo_id_t *dpo[2];
1618   load_balance_t *lb[2];
1619   u32 not_last_hit;
1620   u32 lbi[2];
1621
1622   not_last_hit = last_check->first;
1623   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1624   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1625
1626   vnet_buffer (b[0])->ip.fib_index =
1627     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1628     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1629     vnet_buffer (b[0])->ip.fib_index;
1630
1631   vnet_buffer (b[1])->ip.fib_index =
1632     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1633     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1634     vnet_buffer (b[1])->ip.fib_index;
1635
1636   /*
1637    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1638    *  adjacency for the destination address (the local interface address).
1639    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1640    *  adjacency for the source address (the remote sender's address)
1641    */
1642   if (PREDICT_TRUE (not_last_hit))
1643     {
1644       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1645       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1646
1647       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1648       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1649
1650       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1651                                            &ip[0]->src_address, 2);
1652       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1653                                            &ip[1]->src_address, 2);
1654
1655       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1656                                            &ip[0]->src_address, 3);
1657       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1658                                            &ip[1]->src_address, 3);
1659
1660       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1661       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1662
1663       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1664         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1665       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1666
1667       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1668         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1669       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1670
1671       lb[0] = load_balance_get (lbi[0]);
1672       lb[1] = load_balance_get (lbi[1]);
1673
1674       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1675       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1676
1677       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1678                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1679                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1680       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1682                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1683                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1684
1685       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1686                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1687                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1688       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1689                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1690                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1691                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1692
1693       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1694       last_check->lbi = lbi[1];
1695       last_check->error = error[1];
1696       last_check->first = 0;
1697     }
1698   else
1699     {
1700       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1701         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1702       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1703
1704       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1705         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1706       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1707
1708       error[0] = last_check->error;
1709       error[1] = last_check->error;
1710     }
1711 }
1712
1713 enum ip_local_packet_type_e
1714 {
1715   IP_LOCAL_PACKET_TYPE_L4,
1716   IP_LOCAL_PACKET_TYPE_NAT,
1717   IP_LOCAL_PACKET_TYPE_FRAG,
1718 };
1719
1720 /**
1721  * Determine packet type and next node.
1722  *
1723  * The expectation is that all packets that are not L4 will skip
1724  * checksums and source checks.
1725  */
1726 always_inline u8
1727 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1728 {
1729   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1730
1731   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1732     {
1733       *next = IP_LOCAL_NEXT_REASSEMBLY;
1734       return IP_LOCAL_PACKET_TYPE_FRAG;
1735     }
1736   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1737     {
1738       *next = lm->local_next_by_ip_protocol[ip->protocol];
1739       return IP_LOCAL_PACKET_TYPE_NAT;
1740     }
1741
1742   *next = lm->local_next_by_ip_protocol[ip->protocol];
1743   return IP_LOCAL_PACKET_TYPE_L4;
1744 }
1745
1746 static inline uword
1747 ip4_local_inline (vlib_main_t * vm,
1748                   vlib_node_runtime_t * node,
1749                   vlib_frame_t * frame, int head_of_feature_arc)
1750 {
1751   u32 *from, n_left_from;
1752   vlib_node_runtime_t *error_node =
1753     vlib_node_get_runtime (vm, ip4_local_node.index);
1754   u16 nexts[VLIB_FRAME_SIZE], *next;
1755   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1756   ip4_header_t *ip[2];
1757   u8 error[2], pt[2];
1758
1759   ip4_local_last_check_t last_check = {
1760     /*
1761      * 0.0.0.0 can appear as the source address of an IP packet,
1762      * as can any other address, hence the need to use the 'first'
1763      * member to make sure the .lbi is initialised for the first
1764      * packet.
1765      */
1766     .src = {.as_u32 = 0},
1767     .lbi = ~0,
1768     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1769     .first = 1,
1770   };
1771
1772   from = vlib_frame_vector_args (frame);
1773   n_left_from = frame->n_vectors;
1774
1775   if (node->flags & VLIB_NODE_FLAG_TRACE)
1776     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1777
1778   vlib_get_buffers (vm, from, bufs, n_left_from);
1779   b = bufs;
1780   next = nexts;
1781
1782   while (n_left_from >= 6)
1783     {
1784       u8 not_batch = 0;
1785
1786       /* Prefetch next iteration. */
1787       {
1788         vlib_prefetch_buffer_header (b[4], LOAD);
1789         vlib_prefetch_buffer_header (b[5], LOAD);
1790
1791         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1792         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1793       }
1794
1795       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1796
1797       ip[0] = vlib_buffer_get_current (b[0]);
1798       ip[1] = vlib_buffer_get_current (b[1]);
1799
1800       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1801       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1802
1803       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1804       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1805
1806       not_batch = pt[0] ^ pt[1];
1807
1808       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1809         goto skip_checks;
1810
1811       if (PREDICT_TRUE (not_batch == 0))
1812         {
1813           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1814           ip4_local_check_src_x2 (b, ip, &last_check, error);
1815         }
1816       else
1817         {
1818           if (!pt[0])
1819             {
1820               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1821               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1822             }
1823           if (!pt[1])
1824             {
1825               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1826               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1827             }
1828         }
1829
1830     skip_checks:
1831
1832       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1833                                     head_of_feature_arc);
1834       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1835                                     head_of_feature_arc);
1836
1837       b += 2;
1838       next += 2;
1839       n_left_from -= 2;
1840     }
1841
1842   while (n_left_from > 0)
1843     {
1844       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1845
1846       ip[0] = vlib_buffer_get_current (b[0]);
1847       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1848       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1849
1850       if (head_of_feature_arc == 0 || pt[0])
1851         goto skip_check;
1852
1853       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1854       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1855
1856     skip_check:
1857
1858       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1859                                     head_of_feature_arc);
1860
1861       b += 1;
1862       next += 1;
1863       n_left_from -= 1;
1864     }
1865
1866   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1867   return frame->n_vectors;
1868 }
1869
1870 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1871                                vlib_frame_t * frame)
1872 {
1873   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1874 }
1875
1876 /* *INDENT-OFF* */
1877 VLIB_REGISTER_NODE (ip4_local_node) =
1878 {
1879   .name = "ip4-local",
1880   .vector_size = sizeof (u32),
1881   .format_trace = format_ip4_forward_next_trace,
1882   .n_errors = IP4_N_ERROR,
1883   .error_strings = ip4_error_strings,
1884   .n_next_nodes = IP_LOCAL_N_NEXT,
1885   .next_nodes =
1886   {
1887     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1888     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1889     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1890     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1891     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1892   },
1893 };
1894 /* *INDENT-ON* */
1895
1896
1897 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1898                                           vlib_node_runtime_t * node,
1899                                           vlib_frame_t * frame)
1900 {
1901   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1902 }
1903
1904 /* *INDENT-OFF* */
1905 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1906   .name = "ip4-local-end-of-arc",
1907   .vector_size = sizeof (u32),
1908
1909   .format_trace = format_ip4_forward_next_trace,
1910   .sibling_of = "ip4-local",
1911 };
1912
1913 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1914   .arc_name = "ip4-local",
1915   .node_name = "ip4-local-end-of-arc",
1916   .runs_before = 0, /* not before any other features */
1917 };
1918 /* *INDENT-ON* */
1919
1920 #ifndef CLIB_MARCH_VARIANT
1921 void
1922 ip4_register_protocol (u32 protocol, u32 node_index)
1923 {
1924   vlib_main_t *vm = vlib_get_main ();
1925   ip4_main_t *im = &ip4_main;
1926   ip_lookup_main_t *lm = &im->lookup_main;
1927
1928   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1929   lm->local_next_by_ip_protocol[protocol] =
1930     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1931 }
1932
1933 void
1934 ip4_unregister_protocol (u32 protocol)
1935 {
1936   ip4_main_t *im = &ip4_main;
1937   ip_lookup_main_t *lm = &im->lookup_main;
1938
1939   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1940   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1941 }
1942 #endif
1943
1944 static clib_error_t *
1945 show_ip_local_command_fn (vlib_main_t * vm,
1946                           unformat_input_t * input, vlib_cli_command_t * cmd)
1947 {
1948   ip4_main_t *im = &ip4_main;
1949   ip_lookup_main_t *lm = &im->lookup_main;
1950   int i;
1951
1952   vlib_cli_output (vm, "Protocols handled by ip4_local");
1953   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1954     {
1955       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1956         {
1957           u32 node_index = vlib_get_node (vm,
1958                                           ip4_local_node.index)->
1959             next_nodes[lm->local_next_by_ip_protocol[i]];
1960           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1961                            format_vlib_node_name, vm, node_index);
1962         }
1963     }
1964   return 0;
1965 }
1966
1967
1968
1969 /*?
1970  * Display the set of protocols handled by the local IPv4 stack.
1971  *
1972  * @cliexpar
1973  * Example of how to display local protocol table:
1974  * @cliexstart{show ip local}
1975  * Protocols handled by ip4_local
1976  * 1
1977  * 17
1978  * 47
1979  * @cliexend
1980 ?*/
1981 /* *INDENT-OFF* */
1982 VLIB_CLI_COMMAND (show_ip_local, static) =
1983 {
1984   .path = "show ip local",
1985   .function = show_ip_local_command_fn,
1986   .short_help = "show ip local",
1987 };
1988 /* *INDENT-ON* */
1989
1990 typedef enum
1991 {
1992   IP4_REWRITE_NEXT_DROP,
1993   IP4_REWRITE_NEXT_ICMP_ERROR,
1994   IP4_REWRITE_NEXT_FRAGMENT,
1995   IP4_REWRITE_N_NEXT            /* Last */
1996 } ip4_rewrite_next_t;
1997
1998 /**
1999  * This bits of an IPv4 address to mask to construct a multicast
2000  * MAC address
2001  */
2002 #if CLIB_ARCH_IS_BIG_ENDIAN
2003 #define IP4_MCAST_ADDR_MASK 0x007fffff
2004 #else
2005 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2006 #endif
2007
2008 always_inline void
2009 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2010                u16 adj_packet_bytes, bool df, u16 * next,
2011                u8 is_midchain, u32 * error)
2012 {
2013   if (packet_len > adj_packet_bytes)
2014     {
2015       *error = IP4_ERROR_MTU_EXCEEDED;
2016       if (df)
2017         {
2018           icmp4_error_set_vnet_buffer
2019             (b, ICMP4_destination_unreachable,
2020              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2021              adj_packet_bytes);
2022           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2023         }
2024       else
2025         {
2026           /* IP fragmentation */
2027           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2028                                    (is_midchain ?
2029                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2030                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2031           *next = IP4_REWRITE_NEXT_FRAGMENT;
2032         }
2033     }
2034 }
2035
2036 /* increment TTL & update checksum.
2037    Works either endian, so no need for byte swap. */
2038 static_always_inline void
2039 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2040 {
2041   i32 ttl;
2042   u32 checksum;
2043   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2044     return;
2045
2046   ttl = ip->ttl;
2047
2048   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2049   checksum += checksum >= 0xffff;
2050
2051   ip->checksum = checksum;
2052   ttl += 1;
2053   ip->ttl = ttl;
2054
2055   ASSERT (ip->checksum == ip4_header_checksum (ip));
2056 }
2057
2058 /* Decrement TTL & update checksum.
2059    Works either endian, so no need for byte swap. */
2060 static_always_inline void
2061 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2062                             u32 * error)
2063 {
2064   i32 ttl;
2065   u32 checksum;
2066   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2067     return;
2068
2069   ttl = ip->ttl;
2070
2071   /* Input node should have reject packets with ttl 0. */
2072   ASSERT (ip->ttl > 0);
2073
2074   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2075   checksum += checksum >= 0xffff;
2076
2077   ip->checksum = checksum;
2078   ttl -= 1;
2079   ip->ttl = ttl;
2080
2081   /*
2082    * If the ttl drops below 1 when forwarding, generate
2083    * an ICMP response.
2084    */
2085   if (PREDICT_FALSE (ttl <= 0))
2086     {
2087       *error = IP4_ERROR_TIME_EXPIRED;
2088       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2089       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2090                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2091                                    0);
2092       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2093     }
2094
2095   /* Verify checksum. */
2096   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2097           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2098 }
2099
2100
2101 always_inline uword
2102 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2103                              vlib_node_runtime_t * node,
2104                              vlib_frame_t * frame,
2105                              int do_counters, int is_midchain, int is_mcast)
2106 {
2107   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2108   u32 *from = vlib_frame_vector_args (frame);
2109   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2110   u16 nexts[VLIB_FRAME_SIZE], *next;
2111   u32 n_left_from;
2112   vlib_node_runtime_t *error_node =
2113     vlib_node_get_runtime (vm, ip4_input_node.index);
2114
2115   n_left_from = frame->n_vectors;
2116   u32 thread_index = vm->thread_index;
2117
2118   vlib_get_buffers (vm, from, bufs, n_left_from);
2119   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2120
2121 #if (CLIB_N_PREFETCHES >= 8)
2122   if (n_left_from >= 6)
2123     {
2124       int i;
2125       for (i = 2; i < 6; i++)
2126         vlib_prefetch_buffer_header (bufs[i], LOAD);
2127     }
2128
2129   next = nexts;
2130   b = bufs;
2131   while (n_left_from >= 8)
2132     {
2133       const ip_adjacency_t *adj0, *adj1;
2134       ip4_header_t *ip0, *ip1;
2135       u32 rw_len0, error0, adj_index0;
2136       u32 rw_len1, error1, adj_index1;
2137       u32 tx_sw_if_index0, tx_sw_if_index1;
2138       u8 *p;
2139
2140       vlib_prefetch_buffer_header (b[6], LOAD);
2141       vlib_prefetch_buffer_header (b[7], LOAD);
2142
2143       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2144       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2145
2146       /*
2147        * pre-fetch the per-adjacency counters
2148        */
2149       if (do_counters)
2150         {
2151           vlib_prefetch_combined_counter (&adjacency_counters,
2152                                           thread_index, adj_index0);
2153           vlib_prefetch_combined_counter (&adjacency_counters,
2154                                           thread_index, adj_index1);
2155         }
2156
2157       ip0 = vlib_buffer_get_current (b[0]);
2158       ip1 = vlib_buffer_get_current (b[1]);
2159
2160       error0 = error1 = IP4_ERROR_NONE;
2161
2162       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2163       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2164
2165       /* Rewrite packet header and updates lengths. */
2166       adj0 = adj_get (adj_index0);
2167       adj1 = adj_get (adj_index1);
2168
2169       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2170       rw_len0 = adj0[0].rewrite_header.data_bytes;
2171       rw_len1 = adj1[0].rewrite_header.data_bytes;
2172       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2173       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2174
2175       p = vlib_buffer_get_current (b[2]);
2176       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2177       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2178
2179       p = vlib_buffer_get_current (b[3]);
2180       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2181       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2182
2183       /* Check MTU of outgoing interface. */
2184       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2185       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2186
2187       if (b[0]->flags & VNET_BUFFER_F_GSO)
2188         ip0_len = gso_mtu_sz (b[0]);
2189       if (b[1]->flags & VNET_BUFFER_F_GSO)
2190         ip1_len = gso_mtu_sz (b[1]);
2191
2192       ip4_mtu_check (b[0], ip0_len,
2193                      adj0[0].rewrite_header.max_l3_packet_bytes,
2194                      ip0->flags_and_fragment_offset &
2195                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2196                      next + 0, is_midchain, &error0);
2197       ip4_mtu_check (b[1], ip1_len,
2198                      adj1[0].rewrite_header.max_l3_packet_bytes,
2199                      ip1->flags_and_fragment_offset &
2200                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2201                      next + 1, is_midchain, &error1);
2202
2203       if (is_mcast)
2204         {
2205           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2206                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2207                     IP4_ERROR_SAME_INTERFACE : error0);
2208           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2209                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2210                     IP4_ERROR_SAME_INTERFACE : error1);
2211         }
2212
2213       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2214        * to see the IP header */
2215       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2216         {
2217           u32 next_index = adj0[0].rewrite_header.next_index;
2218           vlib_buffer_advance (b[0], -(word) rw_len0);
2219
2220           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2221           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2222
2223           if (PREDICT_FALSE
2224               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2225             vnet_feature_arc_start (lm->output_feature_arc_index,
2226                                     tx_sw_if_index0, &next_index, b[0]);
2227           next[0] = next_index;
2228           if (is_midchain)
2229             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2230                                         0 /* is_ip6 */ ,
2231                                         0 /* with gso */ );
2232         }
2233       else
2234         {
2235           b[0]->error = error_node->errors[error0];
2236           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2237             ip4_ttl_inc (b[0], ip0);
2238         }
2239       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2240         {
2241           u32 next_index = adj1[0].rewrite_header.next_index;
2242           vlib_buffer_advance (b[1], -(word) rw_len1);
2243
2244           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2245           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2246
2247           if (PREDICT_FALSE
2248               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2249             vnet_feature_arc_start (lm->output_feature_arc_index,
2250                                     tx_sw_if_index1, &next_index, b[1]);
2251           next[1] = next_index;
2252           if (is_midchain)
2253             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2254                                         0 /* is_ip6 */ ,
2255                                         0 /* with gso */ );
2256         }
2257       else
2258         {
2259           b[1]->error = error_node->errors[error1];
2260           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2261             ip4_ttl_inc (b[1], ip1);
2262         }
2263
2264       /* Guess we are only writing on simple Ethernet header. */
2265       vnet_rewrite_two_headers (adj0[0], adj1[0],
2266                                 ip0, ip1, sizeof (ethernet_header_t));
2267
2268       if (do_counters)
2269         {
2270           if (error0 == IP4_ERROR_NONE)
2271             vlib_increment_combined_counter
2272               (&adjacency_counters,
2273                thread_index,
2274                adj_index0, 1,
2275                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2276
2277           if (error1 == IP4_ERROR_NONE)
2278             vlib_increment_combined_counter
2279               (&adjacency_counters,
2280                thread_index,
2281                adj_index1, 1,
2282                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2283         }
2284
2285       if (is_midchain)
2286         {
2287           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2288             adj0->sub_type.midchain.fixup_func
2289               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2290           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2291             adj1->sub_type.midchain.fixup_func
2292               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2293         }
2294
2295       if (is_mcast)
2296         {
2297           /* copy bytes from the IP address into the MAC rewrite */
2298           if (error0 == IP4_ERROR_NONE)
2299             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2300                                         adj0->rewrite_header.dst_mcast_offset,
2301                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2302           if (error1 == IP4_ERROR_NONE)
2303             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2304                                         adj1->rewrite_header.dst_mcast_offset,
2305                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2306         }
2307
2308       next += 2;
2309       b += 2;
2310       n_left_from -= 2;
2311     }
2312 #elif (CLIB_N_PREFETCHES >= 4)
2313   next = nexts;
2314   b = bufs;
2315   while (n_left_from >= 1)
2316     {
2317       ip_adjacency_t *adj0;
2318       ip4_header_t *ip0;
2319       u32 rw_len0, error0, adj_index0;
2320       u32 tx_sw_if_index0;
2321       u8 *p;
2322
2323       /* Prefetch next iteration */
2324       if (PREDICT_TRUE (n_left_from >= 4))
2325         {
2326           ip_adjacency_t *adj2;
2327           u32 adj_index2;
2328
2329           vlib_prefetch_buffer_header (b[3], LOAD);
2330           vlib_prefetch_buffer_data (b[2], LOAD);
2331
2332           /* Prefetch adj->rewrite_header */
2333           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2334           adj2 = adj_get (adj_index2);
2335           p = (u8 *) adj2;
2336           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2337                          LOAD);
2338         }
2339
2340       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2341
2342       /*
2343        * Prefetch the per-adjacency counters
2344        */
2345       if (do_counters)
2346         {
2347           vlib_prefetch_combined_counter (&adjacency_counters,
2348                                           thread_index, adj_index0);
2349         }
2350
2351       ip0 = vlib_buffer_get_current (b[0]);
2352
2353       error0 = IP4_ERROR_NONE;
2354
2355       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2356
2357       /* Rewrite packet header and updates lengths. */
2358       adj0 = adj_get (adj_index0);
2359
2360       /* Rewrite header was prefetched. */
2361       rw_len0 = adj0[0].rewrite_header.data_bytes;
2362       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2363
2364       /* Check MTU of outgoing interface. */
2365       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2366
2367       if (b[0]->flags & VNET_BUFFER_F_GSO)
2368         ip0_len = gso_mtu_sz (b[0]);
2369
2370       ip4_mtu_check (b[0], ip0_len,
2371                      adj0[0].rewrite_header.max_l3_packet_bytes,
2372                      ip0->flags_and_fragment_offset &
2373                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2374                      next + 0, is_midchain, &error0);
2375
2376       if (is_mcast)
2377         {
2378           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2379                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2380                     IP4_ERROR_SAME_INTERFACE : error0);
2381         }
2382
2383       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2384        * to see the IP header */
2385       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2386         {
2387           u32 next_index = adj0[0].rewrite_header.next_index;
2388           vlib_buffer_advance (b[0], -(word) rw_len0);
2389           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2390           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2391
2392           if (PREDICT_FALSE
2393               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2394             vnet_feature_arc_start (lm->output_feature_arc_index,
2395                                     tx_sw_if_index0, &next_index, b[0]);
2396           next[0] = next_index;
2397
2398           if (is_midchain)
2399             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2400                                         0 /* is_ip6 */ ,
2401                                         0 /* with gso */ );
2402
2403           /* Guess we are only writing on simple Ethernet header. */
2404           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2405
2406           /*
2407            * Bump the per-adjacency counters
2408            */
2409           if (do_counters)
2410             vlib_increment_combined_counter
2411               (&adjacency_counters,
2412                thread_index,
2413                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2414                                                            b[0]) + rw_len0);
2415
2416           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2417             adj0->sub_type.midchain.fixup_func
2418               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2419
2420           if (is_mcast)
2421             /* copy bytes from the IP address into the MAC rewrite */
2422             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2423                                         adj0->rewrite_header.dst_mcast_offset,
2424                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2425         }
2426       else
2427         {
2428           b[0]->error = error_node->errors[error0];
2429           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2430             ip4_ttl_inc (b[0], ip0);
2431         }
2432
2433       next += 1;
2434       b += 1;
2435       n_left_from -= 1;
2436     }
2437 #endif
2438
2439   while (n_left_from > 0)
2440     {
2441       ip_adjacency_t *adj0;
2442       ip4_header_t *ip0;
2443       u32 rw_len0, adj_index0, error0;
2444       u32 tx_sw_if_index0;
2445
2446       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2447
2448       adj0 = adj_get (adj_index0);
2449
2450       if (do_counters)
2451         vlib_prefetch_combined_counter (&adjacency_counters,
2452                                         thread_index, adj_index0);
2453
2454       ip0 = vlib_buffer_get_current (b[0]);
2455
2456       error0 = IP4_ERROR_NONE;
2457
2458       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2459
2460
2461       /* Update packet buffer attributes/set output interface. */
2462       rw_len0 = adj0[0].rewrite_header.data_bytes;
2463       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2464
2465       /* Check MTU of outgoing interface. */
2466       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2467       if (b[0]->flags & VNET_BUFFER_F_GSO)
2468         ip0_len = gso_mtu_sz (b[0]);
2469
2470       ip4_mtu_check (b[0], ip0_len,
2471                      adj0[0].rewrite_header.max_l3_packet_bytes,
2472                      ip0->flags_and_fragment_offset &
2473                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2474                      next + 0, is_midchain, &error0);
2475
2476       if (is_mcast)
2477         {
2478           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2479                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2480                     IP4_ERROR_SAME_INTERFACE : error0);
2481         }
2482
2483       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2484        * to see the IP header */
2485       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2486         {
2487           u32 next_index = adj0[0].rewrite_header.next_index;
2488           vlib_buffer_advance (b[0], -(word) rw_len0);
2489           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2490           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2491
2492           if (PREDICT_FALSE
2493               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2494             vnet_feature_arc_start (lm->output_feature_arc_index,
2495                                     tx_sw_if_index0, &next_index, b[0]);
2496           next[0] = next_index;
2497
2498           if (is_midchain)
2499             /* this acts on the packet that is about to be encapped */
2500             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2501                                         0 /* is_ip6 */ ,
2502                                         0 /* with gso */ );
2503
2504           /* Guess we are only writing on simple Ethernet header. */
2505           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2506
2507           if (do_counters)
2508             vlib_increment_combined_counter
2509               (&adjacency_counters,
2510                thread_index, adj_index0, 1,
2511                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2512
2513           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2514             adj0->sub_type.midchain.fixup_func
2515               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2516
2517           if (is_mcast)
2518             /* copy bytes from the IP address into the MAC rewrite */
2519             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2520                                         adj0->rewrite_header.dst_mcast_offset,
2521                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2522         }
2523       else
2524         {
2525           b[0]->error = error_node->errors[error0];
2526           /* undo the TTL decrement - we'll be back to do it again */
2527           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2528             ip4_ttl_inc (b[0], ip0);
2529         }
2530
2531       next += 1;
2532       b += 1;
2533       n_left_from -= 1;
2534     }
2535
2536
2537   /* Need to do trace after rewrites to pick up new packet data. */
2538   if (node->flags & VLIB_NODE_FLAG_TRACE)
2539     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2540
2541   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2542   return frame->n_vectors;
2543 }
2544
2545 always_inline uword
2546 ip4_rewrite_inline (vlib_main_t * vm,
2547                     vlib_node_runtime_t * node,
2548                     vlib_frame_t * frame,
2549                     int do_counters, int is_midchain, int is_mcast)
2550 {
2551   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2552                                       is_midchain, is_mcast);
2553 }
2554
2555
2556 /** @brief IPv4 rewrite node.
2557     @node ip4-rewrite
2558
2559     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2560     header checksum, fetch the ip adjacency, check the outbound mtu,
2561     apply the adjacency rewrite, and send pkts to the adjacency
2562     rewrite header's rewrite_next_index.
2563
2564     @param vm vlib_main_t corresponding to the current thread
2565     @param node vlib_node_runtime_t
2566     @param frame vlib_frame_t whose contents should be dispatched
2567
2568     @par Graph mechanics: buffer metadata, next index usage
2569
2570     @em Uses:
2571     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2572         - the rewrite adjacency index
2573     - <code>adj->lookup_next_index</code>
2574         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2575           the packet will be dropped.
2576     - <code>adj->rewrite_header</code>
2577         - Rewrite string length, rewrite string, next_index
2578
2579     @em Sets:
2580     - <code>b->current_data, b->current_length</code>
2581         - Updated net of applying the rewrite string
2582
2583     <em>Next Indices:</em>
2584     - <code> adj->rewrite_header.next_index </code>
2585       or @c ip4-drop
2586 */
2587
2588 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2589                                  vlib_frame_t * frame)
2590 {
2591   if (adj_are_counters_enabled ())
2592     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2593   else
2594     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2595 }
2596
2597 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2598                                        vlib_node_runtime_t * node,
2599                                        vlib_frame_t * frame)
2600 {
2601   if (adj_are_counters_enabled ())
2602     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2603   else
2604     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2605 }
2606
2607 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2608                                   vlib_node_runtime_t * node,
2609                                   vlib_frame_t * frame)
2610 {
2611   if (adj_are_counters_enabled ())
2612     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2613   else
2614     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2615 }
2616
2617 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2618                                        vlib_node_runtime_t * node,
2619                                        vlib_frame_t * frame)
2620 {
2621   if (adj_are_counters_enabled ())
2622     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2623   else
2624     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2625 }
2626
2627 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2628                                         vlib_node_runtime_t * node,
2629                                         vlib_frame_t * frame)
2630 {
2631   if (adj_are_counters_enabled ())
2632     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2633   else
2634     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2635 }
2636
2637 /* *INDENT-OFF* */
2638 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2639   .name = "ip4-rewrite",
2640   .vector_size = sizeof (u32),
2641
2642   .format_trace = format_ip4_rewrite_trace,
2643
2644   .n_next_nodes = IP4_REWRITE_N_NEXT,
2645   .next_nodes = {
2646     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2647     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2648     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2649   },
2650 };
2651
2652 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2653   .name = "ip4-rewrite-bcast",
2654   .vector_size = sizeof (u32),
2655
2656   .format_trace = format_ip4_rewrite_trace,
2657   .sibling_of = "ip4-rewrite",
2658 };
2659
2660 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2661   .name = "ip4-rewrite-mcast",
2662   .vector_size = sizeof (u32),
2663
2664   .format_trace = format_ip4_rewrite_trace,
2665   .sibling_of = "ip4-rewrite",
2666 };
2667
2668 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2669   .name = "ip4-mcast-midchain",
2670   .vector_size = sizeof (u32),
2671
2672   .format_trace = format_ip4_rewrite_trace,
2673   .sibling_of = "ip4-rewrite",
2674 };
2675
2676 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2677   .name = "ip4-midchain",
2678   .vector_size = sizeof (u32),
2679   .format_trace = format_ip4_rewrite_trace,
2680   .sibling_of = "ip4-rewrite",
2681 };
2682 /* *INDENT-ON */
2683
2684 static int
2685 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2686 {
2687   ip4_fib_mtrie_t *mtrie0;
2688   ip4_fib_mtrie_leaf_t leaf0;
2689   u32 lbi0;
2690
2691   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2692
2693   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2694   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2695   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2696
2697   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2698
2699   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2700 }
2701
2702 static clib_error_t *
2703 test_lookup_command_fn (vlib_main_t * vm,
2704                         unformat_input_t * input, vlib_cli_command_t * cmd)
2705 {
2706   ip4_fib_t *fib;
2707   u32 table_id = 0;
2708   f64 count = 1;
2709   u32 n;
2710   int i;
2711   ip4_address_t ip4_base_address;
2712   u64 errors = 0;
2713
2714   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2715     {
2716       if (unformat (input, "table %d", &table_id))
2717         {
2718           /* Make sure the entry exists. */
2719           fib = ip4_fib_get (table_id);
2720           if ((fib) && (fib->index != table_id))
2721             return clib_error_return (0, "<fib-index> %d does not exist",
2722                                       table_id);
2723         }
2724       else if (unformat (input, "count %f", &count))
2725         ;
2726
2727       else if (unformat (input, "%U",
2728                          unformat_ip4_address, &ip4_base_address))
2729         ;
2730       else
2731         return clib_error_return (0, "unknown input `%U'",
2732                                   format_unformat_error, input);
2733     }
2734
2735   n = count;
2736
2737   for (i = 0; i < n; i++)
2738     {
2739       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2740         errors++;
2741
2742       ip4_base_address.as_u32 =
2743         clib_host_to_net_u32 (1 +
2744                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2745     }
2746
2747   if (errors)
2748     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2749   else
2750     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2751
2752   return 0;
2753 }
2754
2755 /*?
2756  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2757  * given FIB table to determine if there is a conflict with the
2758  * adjacency table. The fib-id can be determined by using the
2759  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2760  * of 0 is used.
2761  *
2762  * @todo This command uses fib-id, other commands use table-id (not
2763  * just a name, they are different indexes). Would like to change this
2764  * to table-id for consistency.
2765  *
2766  * @cliexpar
2767  * Example of how to run the test lookup command:
2768  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2769  * No errors in 2 lookups
2770  * @cliexend
2771 ?*/
2772 /* *INDENT-OFF* */
2773 VLIB_CLI_COMMAND (lookup_test_command, static) =
2774 {
2775   .path = "test lookup",
2776   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2777   .function = test_lookup_command_fn,
2778 };
2779 /* *INDENT-ON* */
2780
2781 #ifndef CLIB_MARCH_VARIANT
2782 int
2783 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2784 {
2785   u32 fib_index;
2786
2787   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2788
2789   if (~0 == fib_index)
2790     return VNET_API_ERROR_NO_SUCH_FIB;
2791
2792   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2793                                   flow_hash_config);
2794
2795   return 0;
2796 }
2797 #endif
2798
2799 static clib_error_t *
2800 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2801                              unformat_input_t * input,
2802                              vlib_cli_command_t * cmd)
2803 {
2804   int matched = 0;
2805   u32 table_id = 0;
2806   u32 flow_hash_config = 0;
2807   int rv;
2808
2809   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2810     {
2811       if (unformat (input, "table %d", &table_id))
2812         matched = 1;
2813 #define _(a,v) \
2814     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2815       foreach_flow_hash_bit
2816 #undef _
2817         else
2818         break;
2819     }
2820
2821   if (matched == 0)
2822     return clib_error_return (0, "unknown input `%U'",
2823                               format_unformat_error, input);
2824
2825   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2826   switch (rv)
2827     {
2828     case 0:
2829       break;
2830
2831     case VNET_API_ERROR_NO_SUCH_FIB:
2832       return clib_error_return (0, "no such FIB table %d", table_id);
2833
2834     default:
2835       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2836       break;
2837     }
2838
2839   return 0;
2840 }
2841
2842 /*?
2843  * Configure the set of IPv4 fields used by the flow hash.
2844  *
2845  * @cliexpar
2846  * Example of how to set the flow hash on a given table:
2847  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2848  * Example of display the configured flow hash:
2849  * @cliexstart{show ip fib}
2850  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2851  * 0.0.0.0/0
2852  *   unicast-ip4-chain
2853  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2854  *     [0] [@0]: dpo-drop ip6
2855  * 0.0.0.0/32
2856  *   unicast-ip4-chain
2857  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2858  *     [0] [@0]: dpo-drop ip6
2859  * 224.0.0.0/8
2860  *   unicast-ip4-chain
2861  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2862  *     [0] [@0]: dpo-drop ip6
2863  * 6.0.1.2/32
2864  *   unicast-ip4-chain
2865  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2866  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2867  * 7.0.0.1/32
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2870  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2871  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2872  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2873  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2874  * 240.0.0.0/8
2875  *   unicast-ip4-chain
2876  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2877  *     [0] [@0]: dpo-drop ip6
2878  * 255.255.255.255/32
2879  *   unicast-ip4-chain
2880  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2881  *     [0] [@0]: dpo-drop ip6
2882  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2883  * 0.0.0.0/0
2884  *   unicast-ip4-chain
2885  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2886  *     [0] [@0]: dpo-drop ip6
2887  * 0.0.0.0/32
2888  *   unicast-ip4-chain
2889  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2890  *     [0] [@0]: dpo-drop ip6
2891  * 172.16.1.0/24
2892  *   unicast-ip4-chain
2893  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2894  *     [0] [@4]: ipv4-glean: af_packet0
2895  * 172.16.1.1/32
2896  *   unicast-ip4-chain
2897  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2898  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2899  * 172.16.1.2/32
2900  *   unicast-ip4-chain
2901  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2902  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2903  * 172.16.2.0/24
2904  *   unicast-ip4-chain
2905  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2906  *     [0] [@4]: ipv4-glean: af_packet1
2907  * 172.16.2.1/32
2908  *   unicast-ip4-chain
2909  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2910  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2911  * 224.0.0.0/8
2912  *   unicast-ip4-chain
2913  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2914  *     [0] [@0]: dpo-drop ip6
2915  * 240.0.0.0/8
2916  *   unicast-ip4-chain
2917  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2918  *     [0] [@0]: dpo-drop ip6
2919  * 255.255.255.255/32
2920  *   unicast-ip4-chain
2921  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2922  *     [0] [@0]: dpo-drop ip6
2923  * @cliexend
2924 ?*/
2925 /* *INDENT-OFF* */
2926 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2927 {
2928   .path = "set ip flow-hash",
2929   .short_help =
2930   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2931   .function = set_ip_flow_hash_command_fn,
2932 };
2933 /* *INDENT-ON* */
2934
2935 #ifndef CLIB_MARCH_VARIANT
2936 int
2937 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2938                              u32 table_index)
2939 {
2940   vnet_main_t *vnm = vnet_get_main ();
2941   vnet_interface_main_t *im = &vnm->interface_main;
2942   ip4_main_t *ipm = &ip4_main;
2943   ip_lookup_main_t *lm = &ipm->lookup_main;
2944   vnet_classify_main_t *cm = &vnet_classify_main;
2945   ip4_address_t *if_addr;
2946
2947   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2948     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2949
2950   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2951     return VNET_API_ERROR_NO_SUCH_ENTRY;
2952
2953   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2954   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2955
2956   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2957
2958   if (NULL != if_addr)
2959     {
2960       fib_prefix_t pfx = {
2961         .fp_len = 32,
2962         .fp_proto = FIB_PROTOCOL_IP4,
2963         .fp_addr.ip4 = *if_addr,
2964       };
2965       u32 fib_index;
2966
2967       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2968                                                        sw_if_index);
2969
2970
2971       if (table_index != (u32) ~ 0)
2972         {
2973           dpo_id_t dpo = DPO_INVALID;
2974
2975           dpo_set (&dpo,
2976                    DPO_CLASSIFY,
2977                    DPO_PROTO_IP4,
2978                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2979
2980           fib_table_entry_special_dpo_add (fib_index,
2981                                            &pfx,
2982                                            FIB_SOURCE_CLASSIFY,
2983                                            FIB_ENTRY_FLAG_NONE, &dpo);
2984           dpo_reset (&dpo);
2985         }
2986       else
2987         {
2988           fib_table_entry_special_remove (fib_index,
2989                                           &pfx, FIB_SOURCE_CLASSIFY);
2990         }
2991     }
2992
2993   return 0;
2994 }
2995 #endif
2996
2997 static clib_error_t *
2998 set_ip_classify_command_fn (vlib_main_t * vm,
2999                             unformat_input_t * input,
3000                             vlib_cli_command_t * cmd)
3001 {
3002   u32 table_index = ~0;
3003   int table_index_set = 0;
3004   u32 sw_if_index = ~0;
3005   int rv;
3006
3007   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3008     {
3009       if (unformat (input, "table-index %d", &table_index))
3010         table_index_set = 1;
3011       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3012                          vnet_get_main (), &sw_if_index))
3013         ;
3014       else
3015         break;
3016     }
3017
3018   if (table_index_set == 0)
3019     return clib_error_return (0, "classify table-index must be specified");
3020
3021   if (sw_if_index == ~0)
3022     return clib_error_return (0, "interface / subif must be specified");
3023
3024   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3025
3026   switch (rv)
3027     {
3028     case 0:
3029       break;
3030
3031     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3032       return clib_error_return (0, "No such interface");
3033
3034     case VNET_API_ERROR_NO_SUCH_ENTRY:
3035       return clib_error_return (0, "No such classifier table");
3036     }
3037   return 0;
3038 }
3039
3040 /*?
3041  * Assign a classification table to an interface. The classification
3042  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3043  * commands. Once the table is create, use this command to filter packets
3044  * on an interface.
3045  *
3046  * @cliexpar
3047  * Example of how to assign a classification table to an interface:
3048  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3049 ?*/
3050 /* *INDENT-OFF* */
3051 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3052 {
3053     .path = "set ip classify",
3054     .short_help =
3055     "set ip classify intfc <interface> table-index <classify-idx>",
3056     .function = set_ip_classify_command_fn,
3057 };
3058 /* *INDENT-ON* */
3059
3060 static clib_error_t *
3061 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3062 {
3063   ip4_main_t *im = &ip4_main;
3064   uword heapsize = 0;
3065
3066   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3067     {
3068       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3069         ;
3070       else
3071         return clib_error_return (0,
3072                                   "invalid heap-size parameter `%U'",
3073                                   format_unformat_error, input);
3074     }
3075
3076   im->mtrie_heap_size = heapsize;
3077
3078   return 0;
3079 }
3080
3081 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3082
3083 /*
3084  * fd.io coding-style-patch-verification: ON
3085  *
3086  * Local Variables:
3087  * eval: (c-set-style "gnu")
3088  * End:
3089  */