ip: remove unnecesary inlining in ip4-rewrite node
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   pfx_special.fp_len = a->address_length;
384   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386   /* set the glean route for the prefix */
387   fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                    FIB_SOURCE_INTERFACE,
389                                    (FIB_ENTRY_FLAG_CONNECTED |
390                                     FIB_ENTRY_FLAG_ATTACHED),
391                                    DPO_PROTO_IP4,
392                                    /* No next-hop address */
393                                    NULL,
394                                    sw_if_index,
395                                    /* invalid FIB index */
396                                    ~0,
397                                    1,
398                                    /* no out-label stack */
399                                    NULL,
400                                    FIB_ROUTE_PATH_FLAG_NONE);
401
402   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
403   if (a->address_length <= 30)
404     {
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if deleting last intf addr in prefix
532    *
533    * We're done now otherwise
534    */
535   if (if_prefix->ref_count > 0)
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541       /* Less work to do in FIB if we remove the covered /32s first */
542
543       /* first address in prefix */
544       pfx_special.fp_addr.ip4.as_u32 =
545         address->as_u32 & im->fib_masks[address_length];
546       pfx_special.fp_len = 32;
547
548       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
549         fib_table_entry_special_remove (fib_index,
550                                         &pfx_special,
551                                         FIB_SOURCE_INTERFACE);
552
553       /* prefix broadcast address */
554       pfx_special.fp_addr.ip4.as_u32 =
555         address->as_u32 | ~im->fib_masks[address_length];
556       pfx_special.fp_len = 32;
557
558       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559         fib_table_entry_special_remove (fib_index,
560                                         &pfx_special,
561                                         FIB_SOURCE_INTERFACE);
562     }
563   else if (address_length == 31)
564     {
565       /* length == 31, delete attached route for the other address */
566       pfx_special.fp_addr.ip4.as_u32 =
567         address->as_u32 ^ clib_host_to_net_u32(1);
568
569       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
570     }
571
572   /* remove glean route for prefix */
573   pfx_special.fp_addr.ip4 = *address;
574   pfx_special.fp_len = address_length;
575   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
576
577   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
578   pool_put (lm->if_prefix_pool, if_prefix);
579 }
580
581 static void
582 ip4_del_interface_routes (u32 sw_if_index,
583                           ip4_main_t * im,
584                           u32 fib_index,
585                           ip4_address_t * address, u32 address_length)
586 {
587   fib_prefix_t pfx = {
588     .fp_len = 32,
589     .fp_proto = FIB_PROTOCOL_IP4,
590     .fp_addr.ip4 = *address,
591   };
592
593   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
594
595   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
596                                    address, address_length);
597 }
598
599 #ifndef CLIB_MARCH_VARIANT
600 void
601 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
602 {
603   ip4_main_t *im = &ip4_main;
604   vnet_main_t *vnm = vnet_get_main ();
605   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
606
607   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
608
609   /*
610    * enable/disable only on the 1<->0 transition
611    */
612   if (is_enable)
613     {
614       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
615         return;
616     }
617   else
618     {
619       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
620       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
621         return;
622     }
623   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
624                                !is_enable, 0, 0);
625
626
627   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
628                                sw_if_index, !is_enable, 0, 0);
629
630   if (is_enable)
631     hi->l3_if_count++;
632   else if (hi->l3_if_count)
633     hi->l3_if_count--;
634
635   {
636     ip4_enable_disable_interface_callback_t *cb;
637     vec_foreach (cb, im->enable_disable_interface_callbacks)
638       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
639   }
640 }
641
642 static clib_error_t *
643 ip4_add_del_interface_address_internal (vlib_main_t * vm,
644                                         u32 sw_if_index,
645                                         ip4_address_t * address,
646                                         u32 address_length, u32 is_del)
647 {
648   vnet_main_t *vnm = vnet_get_main ();
649   ip4_main_t *im = &ip4_main;
650   ip_lookup_main_t *lm = &im->lookup_main;
651   clib_error_t *error = 0;
652   u32 if_address_index;
653   ip4_address_fib_t ip4_af, *addr_fib = 0;
654
655   /* local0 interface doesn't support IP addressing  */
656   if (sw_if_index == 0)
657     {
658       return
659        clib_error_create ("local0 interface doesn't support IP addressing");
660     }
661
662   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
663   ip4_addr_fib_init (&ip4_af, address,
664                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
665   vec_add1 (addr_fib, ip4_af);
666
667   /*
668    * there is no support for adj-fib handling in the presence of overlapping
669    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
670    * most routers do.
671    */
672   /* *INDENT-OFF* */
673   if (!is_del)
674     {
675       /* When adding an address check that it does not conflict
676          with an existing address on any interface in this table. */
677       ip_interface_address_t *ia;
678       vnet_sw_interface_t *sif;
679
680       pool_foreach (sif, vnm->interface_main.sw_interfaces)
681        {
682           if (im->fib_index_by_sw_if_index[sw_if_index] ==
683               im->fib_index_by_sw_if_index[sif->sw_if_index])
684             {
685               foreach_ip_interface_address
686                 (&im->lookup_main, ia, sif->sw_if_index,
687                  0 /* honor unnumbered */ ,
688                  ({
689                    ip4_address_t * x =
690                      ip_interface_address_get_address
691                      (&im->lookup_main, ia);
692
693                    if (ip4_destination_matches_route
694                        (im, address, x, ia->address_length) ||
695                        ip4_destination_matches_route (im,
696                                                       x,
697                                                       address,
698                                                       address_length))
699                      {
700                        /* an intf may have >1 addr from the same prefix */
701                        if ((sw_if_index == sif->sw_if_index) &&
702                            (ia->address_length == address_length) &&
703                            (x->as_u32 != address->as_u32))
704                          continue;
705
706                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
707                          /* if the address we're comparing against is stale
708                           * then the CP has not added this one back yet, maybe
709                           * it never will, so we have to assume it won't and
710                           * ignore it. if it does add it back, then it will fail
711                           * because this one is now present */
712                          continue;
713
714                        /* error if the length or intf was different */
715                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
716
717                        error = clib_error_create
718                          ("failed to add %U on %U which conflicts with %U for interface %U",
719                           format_ip4_address_and_length, address,
720                           address_length,
721                           format_vnet_sw_if_index_name, vnm,
722                           sw_if_index,
723                           format_ip4_address_and_length, x,
724                           ia->address_length,
725                           format_vnet_sw_if_index_name, vnm,
726                           sif->sw_if_index);
727                        goto done;
728                      }
729                  }));
730             }
731       }
732     }
733   /* *INDENT-ON* */
734
735   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
736
737   if (is_del)
738     {
739       if (~0 == if_address_index)
740         {
741           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
742           error = clib_error_create ("%U not found for interface %U",
743                                      lm->format_address_and_length,
744                                      addr_fib, address_length,
745                                      format_vnet_sw_if_index_name, vnm,
746                                      sw_if_index);
747           goto done;
748         }
749
750       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
751                                         address_length, sw_if_index);
752       if (error)
753         goto done;
754     }
755   else
756     {
757       if (~0 != if_address_index)
758         {
759           ip_interface_address_t *ia;
760
761           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
762
763           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
764             {
765               if (ia->sw_if_index == sw_if_index)
766                 {
767                   /* re-adding an address during the replace action.
768                    * consdier this the update. clear the flag and
769                    * we're done */
770                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
771                   goto done;
772                 }
773               else
774                 {
775                   /* The prefix is moving from one interface to another.
776                    * delete the stale and add the new */
777                   ip4_add_del_interface_address_internal (vm,
778                                                           ia->sw_if_index,
779                                                           address,
780                                                           address_length, 1);
781                   ia = NULL;
782                   error = ip_interface_address_add (lm, sw_if_index,
783                                                     addr_fib, address_length,
784                                                     &if_address_index);
785                 }
786             }
787           else
788             {
789               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
790               error = clib_error_create
791                 ("Prefix %U already found on interface %U",
792                  lm->format_address_and_length, addr_fib, address_length,
793                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
794             }
795         }
796       else
797         error = ip_interface_address_add (lm, sw_if_index,
798                                           addr_fib, address_length,
799                                           &if_address_index);
800     }
801
802   if (error)
803     goto done;
804
805   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
806   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
807
808   /* intf addr routes are added/deleted on admin up/down */
809   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
810     {
811       if (is_del)
812         ip4_del_interface_routes (sw_if_index,
813                                   im, ip4_af.fib_index, address,
814                                   address_length);
815       else
816         ip4_add_interface_routes (sw_if_index,
817                                   im, ip4_af.fib_index,
818                                   pool_elt_at_index
819                                   (lm->if_address_pool, if_address_index));
820     }
821
822   ip4_add_del_interface_address_callback_t *cb;
823   vec_foreach (cb, im->add_del_interface_address_callbacks)
824     cb->function (im, cb->function_opaque, sw_if_index,
825                   address, address_length, if_address_index, is_del);
826
827 done:
828   vec_free (addr_fib);
829   return error;
830 }
831
832 clib_error_t *
833 ip4_add_del_interface_address (vlib_main_t * vm,
834                                u32 sw_if_index,
835                                ip4_address_t * address,
836                                u32 address_length, u32 is_del)
837 {
838   return ip4_add_del_interface_address_internal
839     (vm, sw_if_index, address, address_length, is_del);
840 }
841
842 void
843 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
844 {
845   ip_interface_address_t *ia;
846   ip4_main_t *im;
847
848   im = &ip4_main;
849
850   /*
851    * when directed broadcast is enabled, the subnet braodcast route will forward
852    * packets using an adjacency with a broadcast MAC. otherwise it drops
853    */
854   /* *INDENT-OFF* */
855   foreach_ip_interface_address(&im->lookup_main, ia,
856                                sw_if_index, 0,
857      ({
858        if (ia->address_length <= 30)
859          {
860            ip4_address_t *ipa;
861
862            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
863
864            fib_prefix_t pfx = {
865              .fp_len = 32,
866              .fp_proto = FIB_PROTOCOL_IP4,
867              .fp_addr = {
868                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
869              },
870            };
871
872            ip4_add_subnet_bcast_route
873              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
874                                                   sw_if_index),
875               &pfx, sw_if_index);
876          }
877      }));
878   /* *INDENT-ON* */
879 }
880 #endif
881
882 static clib_error_t *
883 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
884 {
885   ip4_main_t *im = &ip4_main;
886   ip_interface_address_t *ia;
887   ip4_address_t *a;
888   u32 is_admin_up, fib_index;
889
890   /* Fill in lookup tables with default table (0). */
891   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
892
893   vec_validate_init_empty (im->
894                            lookup_main.if_address_pool_index_by_sw_if_index,
895                            sw_if_index, ~0);
896
897   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
898
899   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
900
901   /* *INDENT-OFF* */
902   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
903                                 0 /* honor unnumbered */,
904   ({
905     a = ip_interface_address_get_address (&im->lookup_main, ia);
906     if (is_admin_up)
907       ip4_add_interface_routes (sw_if_index,
908                                 im, fib_index,
909                                 ia);
910     else
911       ip4_del_interface_routes (sw_if_index,
912                                 im, fib_index,
913                                 a, ia->address_length);
914   }));
915   /* *INDENT-ON* */
916
917   return 0;
918 }
919
920 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
921
922 /* Built-in ip4 unicast rx feature path definition */
923 /* *INDENT-OFF* */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
925 {
926   .arc_name = "ip4-unicast",
927   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928   .last_in_arc = "ip4-lookup",
929   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
930 };
931
932 VNET_FEATURE_INIT (ip4_flow_classify, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-flow-classify",
936   .runs_before = VNET_FEATURES ("ip4-inacl"),
937 };
938
939 VNET_FEATURE_INIT (ip4_inacl, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-inacl",
943   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
944 };
945
946 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-source-and-port-range-check-rx",
950   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
951 };
952
953 VNET_FEATURE_INIT (ip4_policer_classify, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-policer-classify",
957   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
958 };
959
960 VNET_FEATURE_INIT (ip4_ipsec, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ipsec4-input-feature",
964   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
972 };
973
974 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-vxlan-bypass",
978   .runs_before = VNET_FEATURES ("ip4-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_not_enabled, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-not-enabled",
985   .runs_before = VNET_FEATURES ("ip4-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_lookup, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ip4-lookup",
992   .runs_before = 0,     /* not before any other features */
993 };
994
995 /* Built-in ip4 multicast rx feature path definition */
996 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
997 {
998   .arc_name = "ip4-multicast",
999   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1000   .last_in_arc = "ip4-mfib-forward-lookup",
1001   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "vpath-input-ip4",
1008   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1012 {
1013   .arc_name = "ip4-multicast",
1014   .node_name = "ip4-not-enabled",
1015   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1019 {
1020   .arc_name = "ip4-multicast",
1021   .node_name = "ip4-mfib-forward-lookup",
1022   .runs_before = 0,     /* last feature */
1023 };
1024
1025 /* Source and port-range check ip4 tx feature path definition */
1026 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1027 {
1028   .arc_name = "ip4-output",
1029   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1030   .last_in_arc = "interface-output",
1031   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ip4-source-and-port-range-check-tx",
1038   .runs_before = VNET_FEATURES ("ip4-outacl"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_outacl, static) =
1042 {
1043   .arc_name = "ip4-output",
1044   .node_name = "ip4-outacl",
1045   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1049 {
1050   .arc_name = "ip4-output",
1051   .node_name = "ipsec4-output-feature",
1052   .runs_before = VNET_FEATURES ("interface-output"),
1053 };
1054
1055 /* Built-in ip4 tx feature path definition */
1056 VNET_FEATURE_INIT (ip4_interface_output, static) =
1057 {
1058   .arc_name = "ip4-output",
1059   .node_name = "interface-output",
1060   .runs_before = 0,     /* not before any other features */
1061 };
1062 /* *INDENT-ON* */
1063
1064 static clib_error_t *
1065 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1066 {
1067   ip4_main_t *im = &ip4_main;
1068
1069   /* Fill in lookup tables with default table (0). */
1070   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1071   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1072
1073   if (!is_add)
1074     {
1075       ip4_main_t *im4 = &ip4_main;
1076       ip_lookup_main_t *lm4 = &im4->lookup_main;
1077       ip_interface_address_t *ia = 0;
1078       ip4_address_t *address;
1079       vlib_main_t *vm = vlib_get_main ();
1080
1081       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1082       /* *INDENT-OFF* */
1083       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1084       ({
1085         address = ip_interface_address_get_address (lm4, ia);
1086         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1087       }));
1088       /* *INDENT-ON* */
1089       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1090     }
1091
1092   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1093                                is_add, 0, 0);
1094
1095   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1096                                sw_if_index, is_add, 0, 0);
1097
1098   return /* no error */ 0;
1099 }
1100
1101 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1102
1103 /* Global IP4 main. */
1104 #ifndef CLIB_MARCH_VARIANT
1105 ip4_main_t ip4_main;
1106 #endif /* CLIB_MARCH_VARIANT */
1107
1108 static clib_error_t *
1109 ip4_lookup_init (vlib_main_t * vm)
1110 {
1111   ip4_main_t *im = &ip4_main;
1112   clib_error_t *error;
1113   uword i;
1114
1115   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1116     return error;
1117   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1118     return (error);
1119   if ((error = vlib_call_init_function (vm, fib_module_init)))
1120     return error;
1121   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1122     return error;
1123
1124   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1125     {
1126       u32 m;
1127
1128       if (i < 32)
1129         m = pow2_mask (i) << (32 - i);
1130       else
1131         m = ~0;
1132       im->fib_masks[i] = clib_host_to_net_u32 (m);
1133     }
1134
1135   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1136
1137   /* Create FIB with index 0 and table id of 0. */
1138   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1139                                      FIB_SOURCE_DEFAULT_ROUTE);
1140   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1141                                       MFIB_SOURCE_DEFAULT_ROUTE);
1142
1143   {
1144     pg_node_t *pn;
1145     pn = pg_get_node (ip4_lookup_node.index);
1146     pn->unformat_edit = unformat_pg_ip4_header;
1147   }
1148
1149   {
1150     ethernet_arp_header_t h;
1151
1152     clib_memset (&h, 0, sizeof (h));
1153
1154 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1155 #define _8(f,v) h.f = v;
1156     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1157     _16 (l3_type, ETHERNET_TYPE_IP4);
1158     _8 (n_l2_address_bytes, 6);
1159     _8 (n_l3_address_bytes, 4);
1160     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1161 #undef _16
1162 #undef _8
1163
1164     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1165                                /* data */ &h,
1166                                sizeof (h),
1167                                /* alloc chunk size */ 8,
1168                                "ip4 arp");
1169   }
1170
1171   return error;
1172 }
1173
1174 VLIB_INIT_FUNCTION (ip4_lookup_init);
1175
1176 typedef struct
1177 {
1178   /* Adjacency taken. */
1179   u32 dpo_index;
1180   u32 flow_hash;
1181   u32 fib_index;
1182
1183   /* Packet data, possibly *after* rewrite. */
1184   u8 packet_data[64 - 1 * sizeof (u32)];
1185 }
1186 ip4_forward_next_trace_t;
1187
1188 #ifndef CLIB_MARCH_VARIANT
1189 u8 *
1190 format_ip4_forward_next_trace (u8 * s, va_list * args)
1191 {
1192   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195   u32 indent = format_get_indent (s);
1196   s = format (s, "%U%U",
1197               format_white_space, indent,
1198               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1199   return s;
1200 }
1201 #endif
1202
1203 static u8 *
1204 format_ip4_lookup_trace (u8 * s, va_list * args)
1205 {
1206   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1207   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1208   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1209   u32 indent = format_get_indent (s);
1210
1211   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1212               t->fib_index, t->dpo_index, t->flow_hash);
1213   s = format (s, "\n%U%U",
1214               format_white_space, indent,
1215               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1216   return s;
1217 }
1218
1219 static u8 *
1220 format_ip4_rewrite_trace (u8 * s, va_list * args)
1221 {
1222   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1223   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1224   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1225   u32 indent = format_get_indent (s);
1226
1227   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1228               t->fib_index, t->dpo_index, format_ip_adjacency,
1229               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1230   s = format (s, "\n%U%U",
1231               format_white_space, indent,
1232               format_ip_adjacency_packet_data,
1233               t->packet_data, sizeof (t->packet_data));
1234   return s;
1235 }
1236
1237 #ifndef CLIB_MARCH_VARIANT
1238 /* Common trace function for all ip4-forward next nodes. */
1239 void
1240 ip4_forward_next_trace (vlib_main_t * vm,
1241                         vlib_node_runtime_t * node,
1242                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1243 {
1244   u32 *from, n_left;
1245   ip4_main_t *im = &ip4_main;
1246
1247   n_left = frame->n_vectors;
1248   from = vlib_frame_vector_args (frame);
1249
1250   while (n_left >= 4)
1251     {
1252       u32 bi0, bi1;
1253       vlib_buffer_t *b0, *b1;
1254       ip4_forward_next_trace_t *t0, *t1;
1255
1256       /* Prefetch next iteration. */
1257       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1258       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1259
1260       bi0 = from[0];
1261       bi1 = from[1];
1262
1263       b0 = vlib_get_buffer (vm, bi0);
1264       b1 = vlib_get_buffer (vm, bi1);
1265
1266       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1267         {
1268           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1269           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1270           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1271           t0->fib_index =
1272             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1273              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1274             vec_elt (im->fib_index_by_sw_if_index,
1275                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1276
1277           clib_memcpy_fast (t0->packet_data,
1278                             vlib_buffer_get_current (b0),
1279                             sizeof (t0->packet_data));
1280         }
1281       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1284           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1285           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1286           t1->fib_index =
1287             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1292                             sizeof (t1->packet_data));
1293         }
1294       from += 2;
1295       n_left -= 2;
1296     }
1297
1298   while (n_left >= 1)
1299     {
1300       u32 bi0;
1301       vlib_buffer_t *b0;
1302       ip4_forward_next_trace_t *t0;
1303
1304       bi0 = from[0];
1305
1306       b0 = vlib_get_buffer (vm, bi0);
1307
1308       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1309         {
1310           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1311           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1312           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1313           t0->fib_index =
1314             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1315              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1316             vec_elt (im->fib_index_by_sw_if_index,
1317                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1318           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1319                             sizeof (t0->packet_data));
1320         }
1321       from += 1;
1322       n_left -= 1;
1323     }
1324 }
1325
1326 /* Compute TCP/UDP/ICMP4 checksum in software. */
1327 u16
1328 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1329                               ip4_header_t * ip0)
1330 {
1331   ip_csum_t sum0;
1332   u32 ip_header_length, payload_length_host_byte_order;
1333
1334   /* Initialize checksum with ip header. */
1335   ip_header_length = ip4_header_bytes (ip0);
1336   payload_length_host_byte_order =
1337     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1338   sum0 =
1339     clib_host_to_net_u32 (payload_length_host_byte_order +
1340                           (ip0->protocol << 16));
1341
1342   if (BITS (uword) == 32)
1343     {
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->src_address, u32));
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->dst_address, u32));
1350     }
1351   else
1352     sum0 =
1353       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1354
1355   return ip_calculate_l4_checksum (vm, p0, sum0,
1356                                    payload_length_host_byte_order, (u8 *) ip0,
1357                                    ip_header_length, NULL);
1358 }
1359
1360 u32
1361 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 {
1363   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1364   udp_header_t *udp0;
1365   u16 sum16;
1366
1367   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1368           || ip0->protocol == IP_PROTOCOL_UDP);
1369
1370   udp0 = (void *) (ip0 + 1);
1371   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372     {
1373       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1374                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1375       return p0->flags;
1376     }
1377
1378   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379
1380   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1381                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1382
1383   return p0->flags;
1384 }
1385 #endif
1386
1387 /* *INDENT-OFF* */
1388 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 {
1390   .arc_name  = "ip4-local",
1391   .start_nodes = VNET_FEATURES ("ip4-local"),
1392   .last_in_arc = "ip4-local-end-of-arc",
1393 };
1394 /* *INDENT-ON* */
1395
1396 static inline void
1397 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1398                             ip4_header_t * ip, u8 is_udp, u8 * error,
1399                             u8 * good_tcp_udp)
1400 {
1401   u32 flags0;
1402   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1403   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1404   if (is_udp)
1405     {
1406       udp_header_t *udp;
1407       u32 ip_len, udp_len;
1408       i32 len_diff;
1409       udp = ip4_next_header (ip);
1410       /* Verify UDP length. */
1411       ip_len = clib_net_to_host_u16 (ip->length);
1412       udp_len = clib_net_to_host_u16 (udp->length);
1413
1414       len_diff = ip_len - udp_len;
1415       *good_tcp_udp &= len_diff >= 0;
1416       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1417     }
1418 }
1419
1420 #define ip4_local_csum_is_offloaded(_b)                                       \
1421   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1422    (vnet_buffer2 (_b)->oflags &                                               \
1423     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1424
1425 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1426     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1427         || ip4_local_csum_is_offloaded (_b)))
1428
1429 #define ip4_local_csum_is_valid(_b)                                     \
1430     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1431         || (ip4_local_csum_is_offloaded (_b))) != 0
1432
1433 static inline void
1434 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1435                          ip4_header_t * ih, u8 * error)
1436 {
1437   u8 is_udp, is_tcp_udp, good_tcp_udp;
1438
1439   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1440   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1441
1442   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1443     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1444   else
1445     good_tcp_udp = ip4_local_csum_is_valid (b);
1446
1447   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1448   *error = (is_tcp_udp && !good_tcp_udp
1449             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1450 }
1451
1452 static inline void
1453 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1454                             ip4_header_t ** ih, u8 * error)
1455 {
1456   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1457
1458   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1459   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1460
1461   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1462   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1463
1464   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1465   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1466
1467   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1468                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1469     {
1470       if (is_tcp_udp[0])
1471         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1472                                     &good_tcp_udp[0]);
1473       if (is_tcp_udp[1])
1474         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1475                                     &good_tcp_udp[1]);
1476     }
1477
1478   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1479               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1480   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1481               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1482 }
1483
1484 static inline void
1485 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1486                               vlib_buffer_t * b, u16 * next, u8 error,
1487                               u8 head_of_feature_arc)
1488 {
1489   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1490   u32 next_index;
1491
1492   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1493   b->error = error ? error_node->errors[error] : 0;
1494   if (head_of_feature_arc)
1495     {
1496       next_index = *next;
1497       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1498         {
1499           vnet_feature_arc_start (arc_index,
1500                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1501                                   &next_index, b);
1502           *next = next_index;
1503         }
1504     }
1505 }
1506
1507 typedef struct
1508 {
1509   ip4_address_t src;
1510   u32 lbi;
1511   u8 error;
1512   u8 first;
1513 } ip4_local_last_check_t;
1514
1515 static inline void
1516 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1517                      ip4_local_last_check_t * last_check, u8 * error0)
1518 {
1519   ip4_fib_mtrie_leaf_t leaf0;
1520   ip4_fib_mtrie_t *mtrie0;
1521   const dpo_id_t *dpo0;
1522   load_balance_t *lb0;
1523   u32 lbi0;
1524
1525   vnet_buffer (b)->ip.fib_index =
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1528
1529   /*
1530    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1531    *  adjacency for the destination address (the local interface address).
1532    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1533    *  adjacency for the source address (the remote sender's address)
1534    */
1535   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1536       last_check->first)
1537     {
1538       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1539       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1540       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1541       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1542       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1543
1544       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1545         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1546       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1547
1548       lb0 = load_balance_get (lbi0);
1549       dpo0 = load_balance_get_bucket_i (lb0, 0);
1550
1551       /*
1552        * Must have a route to source otherwise we drop the packet.
1553        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1554        *
1555        * The checks are:
1556        *  - the source is a recieve => it's from us => bogus, do this
1557        *    first since it sets a different error code.
1558        *  - uRPF check for any route to source - accept if passes.
1559        *  - allow packets destined to the broadcast address from unknown sources
1560        */
1561
1562       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1563                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1564                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1565       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1566                   && !fib_urpf_check_size (lb0->lb_urpf)
1567                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1568                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1569
1570       last_check->src.as_u32 = ip0->src_address.as_u32;
1571       last_check->lbi = lbi0;
1572       last_check->error = *error0;
1573       last_check->first = 0;
1574     }
1575   else
1576     {
1577       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1578         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1579       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1580       *error0 = last_check->error;
1581     }
1582 }
1583
1584 static inline void
1585 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1586                         ip4_local_last_check_t * last_check, u8 * error)
1587 {
1588   ip4_fib_mtrie_leaf_t leaf[2];
1589   ip4_fib_mtrie_t *mtrie[2];
1590   const dpo_id_t *dpo[2];
1591   load_balance_t *lb[2];
1592   u32 not_last_hit;
1593   u32 lbi[2];
1594
1595   not_last_hit = last_check->first;
1596   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1597   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1598
1599   vnet_buffer (b[0])->ip.fib_index =
1600     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1601     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1602     vnet_buffer (b[0])->ip.fib_index;
1603
1604   vnet_buffer (b[1])->ip.fib_index =
1605     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1606     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1607     vnet_buffer (b[1])->ip.fib_index;
1608
1609   /*
1610    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1611    *  adjacency for the destination address (the local interface address).
1612    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1613    *  adjacency for the source address (the remote sender's address)
1614    */
1615   if (PREDICT_TRUE (not_last_hit))
1616     {
1617       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1618       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1619
1620       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1621       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1622
1623       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1624                                            &ip[0]->src_address, 2);
1625       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1626                                            &ip[1]->src_address, 2);
1627
1628       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1629                                            &ip[0]->src_address, 3);
1630       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1631                                            &ip[1]->src_address, 3);
1632
1633       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1634       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1635
1636       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1637         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1638       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1639
1640       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1641         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1642       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1643
1644       lb[0] = load_balance_get (lbi[0]);
1645       lb[1] = load_balance_get (lbi[1]);
1646
1647       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1648       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1649
1650       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1651                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1652                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1653       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1654                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1655                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1656                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1657
1658       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1659                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1660                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1661       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1662                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1663                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1664                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1665
1666       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1667       last_check->lbi = lbi[1];
1668       last_check->error = error[1];
1669       last_check->first = 0;
1670     }
1671   else
1672     {
1673       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1674         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1675       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1676
1677       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1678         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1679       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1680
1681       error[0] = last_check->error;
1682       error[1] = last_check->error;
1683     }
1684 }
1685
1686 enum ip_local_packet_type_e
1687 {
1688   IP_LOCAL_PACKET_TYPE_L4,
1689   IP_LOCAL_PACKET_TYPE_NAT,
1690   IP_LOCAL_PACKET_TYPE_FRAG,
1691 };
1692
1693 /**
1694  * Determine packet type and next node.
1695  *
1696  * The expectation is that all packets that are not L4 will skip
1697  * checksums and source checks.
1698  */
1699 always_inline u8
1700 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1701 {
1702   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1703
1704   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1705     {
1706       *next = IP_LOCAL_NEXT_REASSEMBLY;
1707       return IP_LOCAL_PACKET_TYPE_FRAG;
1708     }
1709   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1710     {
1711       *next = lm->local_next_by_ip_protocol[ip->protocol];
1712       return IP_LOCAL_PACKET_TYPE_NAT;
1713     }
1714
1715   *next = lm->local_next_by_ip_protocol[ip->protocol];
1716   return IP_LOCAL_PACKET_TYPE_L4;
1717 }
1718
1719 static inline uword
1720 ip4_local_inline (vlib_main_t * vm,
1721                   vlib_node_runtime_t * node,
1722                   vlib_frame_t * frame, int head_of_feature_arc)
1723 {
1724   u32 *from, n_left_from;
1725   vlib_node_runtime_t *error_node =
1726     vlib_node_get_runtime (vm, ip4_local_node.index);
1727   u16 nexts[VLIB_FRAME_SIZE], *next;
1728   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1729   ip4_header_t *ip[2];
1730   u8 error[2], pt[2];
1731
1732   ip4_local_last_check_t last_check = {
1733     /*
1734      * 0.0.0.0 can appear as the source address of an IP packet,
1735      * as can any other address, hence the need to use the 'first'
1736      * member to make sure the .lbi is initialised for the first
1737      * packet.
1738      */
1739     .src = {.as_u32 = 0},
1740     .lbi = ~0,
1741     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1742     .first = 1,
1743   };
1744
1745   from = vlib_frame_vector_args (frame);
1746   n_left_from = frame->n_vectors;
1747
1748   if (node->flags & VLIB_NODE_FLAG_TRACE)
1749     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1750
1751   vlib_get_buffers (vm, from, bufs, n_left_from);
1752   b = bufs;
1753   next = nexts;
1754
1755   while (n_left_from >= 6)
1756     {
1757       u8 not_batch = 0;
1758
1759       /* Prefetch next iteration. */
1760       {
1761         vlib_prefetch_buffer_header (b[4], LOAD);
1762         vlib_prefetch_buffer_header (b[5], LOAD);
1763
1764         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1765         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1766       }
1767
1768       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1769
1770       ip[0] = vlib_buffer_get_current (b[0]);
1771       ip[1] = vlib_buffer_get_current (b[1]);
1772
1773       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1774       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1775
1776       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1777       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1778
1779       not_batch = pt[0] ^ pt[1];
1780
1781       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1782         goto skip_checks;
1783
1784       if (PREDICT_TRUE (not_batch == 0))
1785         {
1786           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1787           ip4_local_check_src_x2 (b, ip, &last_check, error);
1788         }
1789       else
1790         {
1791           if (!pt[0])
1792             {
1793               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1794               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1795             }
1796           if (!pt[1])
1797             {
1798               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1799               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1800             }
1801         }
1802
1803     skip_checks:
1804
1805       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1806                                     head_of_feature_arc);
1807       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1808                                     head_of_feature_arc);
1809
1810       b += 2;
1811       next += 2;
1812       n_left_from -= 2;
1813     }
1814
1815   while (n_left_from > 0)
1816     {
1817       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1818
1819       ip[0] = vlib_buffer_get_current (b[0]);
1820       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1821       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1822
1823       if (head_of_feature_arc == 0 || pt[0])
1824         goto skip_check;
1825
1826       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1827       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1828
1829     skip_check:
1830
1831       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1832                                     head_of_feature_arc);
1833
1834       b += 1;
1835       next += 1;
1836       n_left_from -= 1;
1837     }
1838
1839   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1840   return frame->n_vectors;
1841 }
1842
1843 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1844                                vlib_frame_t * frame)
1845 {
1846   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1847 }
1848
1849 /* *INDENT-OFF* */
1850 VLIB_REGISTER_NODE (ip4_local_node) =
1851 {
1852   .name = "ip4-local",
1853   .vector_size = sizeof (u32),
1854   .format_trace = format_ip4_forward_next_trace,
1855   .n_errors = IP4_N_ERROR,
1856   .error_strings = ip4_error_strings,
1857   .n_next_nodes = IP_LOCAL_N_NEXT,
1858   .next_nodes =
1859   {
1860     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1861     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1862     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1863     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1864     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1865   },
1866 };
1867 /* *INDENT-ON* */
1868
1869
1870 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1871                                           vlib_node_runtime_t * node,
1872                                           vlib_frame_t * frame)
1873 {
1874   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1875 }
1876
1877 /* *INDENT-OFF* */
1878 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1879   .name = "ip4-local-end-of-arc",
1880   .vector_size = sizeof (u32),
1881
1882   .format_trace = format_ip4_forward_next_trace,
1883   .sibling_of = "ip4-local",
1884 };
1885
1886 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1887   .arc_name = "ip4-local",
1888   .node_name = "ip4-local-end-of-arc",
1889   .runs_before = 0, /* not before any other features */
1890 };
1891 /* *INDENT-ON* */
1892
1893 #ifndef CLIB_MARCH_VARIANT
1894 void
1895 ip4_register_protocol (u32 protocol, u32 node_index)
1896 {
1897   vlib_main_t *vm = vlib_get_main ();
1898   ip4_main_t *im = &ip4_main;
1899   ip_lookup_main_t *lm = &im->lookup_main;
1900
1901   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1902   lm->local_next_by_ip_protocol[protocol] =
1903     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1904 }
1905
1906 void
1907 ip4_unregister_protocol (u32 protocol)
1908 {
1909   ip4_main_t *im = &ip4_main;
1910   ip_lookup_main_t *lm = &im->lookup_main;
1911
1912   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1913   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1914 }
1915 #endif
1916
1917 static clib_error_t *
1918 show_ip_local_command_fn (vlib_main_t * vm,
1919                           unformat_input_t * input, vlib_cli_command_t * cmd)
1920 {
1921   ip4_main_t *im = &ip4_main;
1922   ip_lookup_main_t *lm = &im->lookup_main;
1923   int i;
1924
1925   vlib_cli_output (vm, "Protocols handled by ip4_local");
1926   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1927     {
1928       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1929         {
1930           u32 node_index = vlib_get_node (vm,
1931                                           ip4_local_node.index)->
1932             next_nodes[lm->local_next_by_ip_protocol[i]];
1933           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1934                            format_vlib_node_name, vm, node_index);
1935         }
1936     }
1937   return 0;
1938 }
1939
1940
1941
1942 /*?
1943  * Display the set of protocols handled by the local IPv4 stack.
1944  *
1945  * @cliexpar
1946  * Example of how to display local protocol table:
1947  * @cliexstart{show ip local}
1948  * Protocols handled by ip4_local
1949  * 1
1950  * 17
1951  * 47
1952  * @cliexend
1953 ?*/
1954 /* *INDENT-OFF* */
1955 VLIB_CLI_COMMAND (show_ip_local, static) =
1956 {
1957   .path = "show ip local",
1958   .function = show_ip_local_command_fn,
1959   .short_help = "show ip local",
1960 };
1961 /* *INDENT-ON* */
1962
1963 typedef enum
1964 {
1965   IP4_REWRITE_NEXT_DROP,
1966   IP4_REWRITE_NEXT_ICMP_ERROR,
1967   IP4_REWRITE_NEXT_FRAGMENT,
1968   IP4_REWRITE_N_NEXT            /* Last */
1969 } ip4_rewrite_next_t;
1970
1971 /**
1972  * This bits of an IPv4 address to mask to construct a multicast
1973  * MAC address
1974  */
1975 #if CLIB_ARCH_IS_BIG_ENDIAN
1976 #define IP4_MCAST_ADDR_MASK 0x007fffff
1977 #else
1978 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1979 #endif
1980
1981 always_inline void
1982 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1983                u16 adj_packet_bytes, bool df, u16 * next,
1984                u8 is_midchain, u32 * error)
1985 {
1986   if (packet_len > adj_packet_bytes)
1987     {
1988       *error = IP4_ERROR_MTU_EXCEEDED;
1989       if (df)
1990         {
1991           icmp4_error_set_vnet_buffer
1992             (b, ICMP4_destination_unreachable,
1993              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1994              adj_packet_bytes);
1995           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1996         }
1997       else
1998         {
1999           /* IP fragmentation */
2000           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2001                                    (is_midchain ?
2002                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2003                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2004           *next = IP4_REWRITE_NEXT_FRAGMENT;
2005         }
2006     }
2007 }
2008
2009 /* increment TTL & update checksum.
2010    Works either endian, so no need for byte swap. */
2011 static_always_inline void
2012 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2013 {
2014   i32 ttl;
2015   u32 checksum;
2016   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2017     return;
2018
2019   ttl = ip->ttl;
2020
2021   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2022   checksum += checksum >= 0xffff;
2023
2024   ip->checksum = checksum;
2025   ttl += 1;
2026   ip->ttl = ttl;
2027
2028   ASSERT (ip4_header_checksum_is_valid (ip));
2029 }
2030
2031 /* Decrement TTL & update checksum.
2032    Works either endian, so no need for byte swap. */
2033 static_always_inline void
2034 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2035                             u32 * error)
2036 {
2037   i32 ttl;
2038   u32 checksum;
2039   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2040     return;
2041
2042   ttl = ip->ttl;
2043
2044   /* Input node should have reject packets with ttl 0. */
2045   ASSERT (ip->ttl > 0);
2046
2047   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2048   checksum += checksum >= 0xffff;
2049
2050   ip->checksum = checksum;
2051   ttl -= 1;
2052   ip->ttl = ttl;
2053
2054   /*
2055    * If the ttl drops below 1 when forwarding, generate
2056    * an ICMP response.
2057    */
2058   if (PREDICT_FALSE (ttl <= 0))
2059     {
2060       *error = IP4_ERROR_TIME_EXPIRED;
2061       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2062       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2063                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2064                                    0);
2065       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2066     }
2067
2068   /* Verify checksum. */
2069   ASSERT (ip4_header_checksum_is_valid (ip) ||
2070           (vnet_buffer2 (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
2071 }
2072
2073 always_inline uword
2074 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2075                     vlib_frame_t *frame, int do_counters, int is_midchain,
2076                     int is_mcast)
2077 {
2078   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2079   u32 *from = vlib_frame_vector_args (frame);
2080   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2081   u16 nexts[VLIB_FRAME_SIZE], *next;
2082   u32 n_left_from;
2083   vlib_node_runtime_t *error_node =
2084     vlib_node_get_runtime (vm, ip4_input_node.index);
2085
2086   n_left_from = frame->n_vectors;
2087   u32 thread_index = vm->thread_index;
2088
2089   vlib_get_buffers (vm, from, bufs, n_left_from);
2090   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2091
2092 #if (CLIB_N_PREFETCHES >= 8)
2093   if (n_left_from >= 6)
2094     {
2095       int i;
2096       for (i = 2; i < 6; i++)
2097         vlib_prefetch_buffer_header (bufs[i], LOAD);
2098     }
2099
2100   next = nexts;
2101   b = bufs;
2102   while (n_left_from >= 8)
2103     {
2104       const ip_adjacency_t *adj0, *adj1;
2105       ip4_header_t *ip0, *ip1;
2106       u32 rw_len0, error0, adj_index0;
2107       u32 rw_len1, error1, adj_index1;
2108       u32 tx_sw_if_index0, tx_sw_if_index1;
2109       u8 *p;
2110
2111       if (is_midchain)
2112         {
2113           vlib_prefetch_buffer_header (b[6], LOAD);
2114           vlib_prefetch_buffer_header (b[7], LOAD);
2115         }
2116
2117       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2118       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2119
2120       /*
2121        * pre-fetch the per-adjacency counters
2122        */
2123       if (do_counters)
2124         {
2125           vlib_prefetch_combined_counter (&adjacency_counters,
2126                                           thread_index, adj_index0);
2127           vlib_prefetch_combined_counter (&adjacency_counters,
2128                                           thread_index, adj_index1);
2129         }
2130
2131       ip0 = vlib_buffer_get_current (b[0]);
2132       ip1 = vlib_buffer_get_current (b[1]);
2133
2134       error0 = error1 = IP4_ERROR_NONE;
2135
2136       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2137       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2138
2139       /* Rewrite packet header and updates lengths. */
2140       adj0 = adj_get (adj_index0);
2141       adj1 = adj_get (adj_index1);
2142
2143       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2144       rw_len0 = adj0[0].rewrite_header.data_bytes;
2145       rw_len1 = adj1[0].rewrite_header.data_bytes;
2146       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2147       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2148
2149       p = vlib_buffer_get_current (b[2]);
2150       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2151       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2152
2153       p = vlib_buffer_get_current (b[3]);
2154       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2155       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2156
2157       /* Check MTU of outgoing interface. */
2158       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2159       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2160
2161       if (b[0]->flags & VNET_BUFFER_F_GSO)
2162         ip0_len = gso_mtu_sz (b[0]);
2163       if (b[1]->flags & VNET_BUFFER_F_GSO)
2164         ip1_len = gso_mtu_sz (b[1]);
2165
2166       ip4_mtu_check (b[0], ip0_len,
2167                      adj0[0].rewrite_header.max_l3_packet_bytes,
2168                      ip0->flags_and_fragment_offset &
2169                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2170                      next + 0, is_midchain, &error0);
2171       ip4_mtu_check (b[1], ip1_len,
2172                      adj1[0].rewrite_header.max_l3_packet_bytes,
2173                      ip1->flags_and_fragment_offset &
2174                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2175                      next + 1, is_midchain, &error1);
2176
2177       if (is_mcast)
2178         {
2179           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2180                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2181                     IP4_ERROR_SAME_INTERFACE : error0);
2182           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2183                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2184                     IP4_ERROR_SAME_INTERFACE : error1);
2185         }
2186
2187       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2188        * to see the IP header */
2189       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2190         {
2191           u32 next_index = adj0[0].rewrite_header.next_index;
2192           vlib_buffer_advance (b[0], -(word) rw_len0);
2193
2194           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2195           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2196
2197           if (PREDICT_FALSE
2198               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2199             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2200                                                 tx_sw_if_index0,
2201                                                 &next_index, b[0],
2202                                                 adj0->ia_cfg_index);
2203
2204           next[0] = next_index;
2205           if (is_midchain)
2206             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2207                                         0 /* is_ip6 */ );
2208         }
2209       else
2210         {
2211           b[0]->error = error_node->errors[error0];
2212           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2213             ip4_ttl_inc (b[0], ip0);
2214         }
2215       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2216         {
2217           u32 next_index = adj1[0].rewrite_header.next_index;
2218           vlib_buffer_advance (b[1], -(word) rw_len1);
2219
2220           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2221           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2222
2223           if (PREDICT_FALSE
2224               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2225             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2226                                                 tx_sw_if_index1,
2227                                                 &next_index, b[1],
2228                                                 adj1->ia_cfg_index);
2229           next[1] = next_index;
2230           if (is_midchain)
2231             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2232                                         0 /* is_ip6 */ );
2233         }
2234       else
2235         {
2236           b[1]->error = error_node->errors[error1];
2237           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2238             ip4_ttl_inc (b[1], ip1);
2239         }
2240
2241       if (is_midchain)
2242         /* Guess we are only writing on ipv4 header. */
2243         vnet_rewrite_two_headers (adj0[0], adj1[0],
2244                                   ip0, ip1, sizeof (ip4_header_t));
2245       else
2246         /* Guess we are only writing on simple Ethernet header. */
2247         vnet_rewrite_two_headers (adj0[0], adj1[0],
2248                                   ip0, ip1, sizeof (ethernet_header_t));
2249
2250       if (do_counters)
2251         {
2252           if (error0 == IP4_ERROR_NONE)
2253             vlib_increment_combined_counter
2254               (&adjacency_counters,
2255                thread_index,
2256                adj_index0, 1,
2257                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2258
2259           if (error1 == IP4_ERROR_NONE)
2260             vlib_increment_combined_counter
2261               (&adjacency_counters,
2262                thread_index,
2263                adj_index1, 1,
2264                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2265         }
2266
2267       if (is_midchain)
2268         {
2269           if (error0 == IP4_ERROR_NONE)
2270             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2271           if (error1 == IP4_ERROR_NONE)
2272             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2273         }
2274
2275       if (is_mcast)
2276         {
2277           /* copy bytes from the IP address into the MAC rewrite */
2278           if (error0 == IP4_ERROR_NONE)
2279             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2280                                         adj0->rewrite_header.dst_mcast_offset,
2281                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2282           if (error1 == IP4_ERROR_NONE)
2283             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2284                                         adj1->rewrite_header.dst_mcast_offset,
2285                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2286         }
2287
2288       next += 2;
2289       b += 2;
2290       n_left_from -= 2;
2291     }
2292 #elif (CLIB_N_PREFETCHES >= 4)
2293   next = nexts;
2294   b = bufs;
2295   while (n_left_from >= 1)
2296     {
2297       ip_adjacency_t *adj0;
2298       ip4_header_t *ip0;
2299       u32 rw_len0, error0, adj_index0;
2300       u32 tx_sw_if_index0;
2301       u8 *p;
2302
2303       /* Prefetch next iteration */
2304       if (PREDICT_TRUE (n_left_from >= 4))
2305         {
2306           ip_adjacency_t *adj2;
2307           u32 adj_index2;
2308
2309           vlib_prefetch_buffer_header (b[3], LOAD);
2310           vlib_prefetch_buffer_data (b[2], LOAD);
2311
2312           /* Prefetch adj->rewrite_header */
2313           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2314           adj2 = adj_get (adj_index2);
2315           p = (u8 *) adj2;
2316           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2317                          LOAD);
2318         }
2319
2320       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2321
2322       /*
2323        * Prefetch the per-adjacency counters
2324        */
2325       if (do_counters)
2326         {
2327           vlib_prefetch_combined_counter (&adjacency_counters,
2328                                           thread_index, adj_index0);
2329         }
2330
2331       ip0 = vlib_buffer_get_current (b[0]);
2332
2333       error0 = IP4_ERROR_NONE;
2334
2335       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2336
2337       /* Rewrite packet header and updates lengths. */
2338       adj0 = adj_get (adj_index0);
2339
2340       /* Rewrite header was prefetched. */
2341       rw_len0 = adj0[0].rewrite_header.data_bytes;
2342       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2343
2344       /* Check MTU of outgoing interface. */
2345       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2346
2347       if (b[0]->flags & VNET_BUFFER_F_GSO)
2348         ip0_len = gso_mtu_sz (b[0]);
2349
2350       ip4_mtu_check (b[0], ip0_len,
2351                      adj0[0].rewrite_header.max_l3_packet_bytes,
2352                      ip0->flags_and_fragment_offset &
2353                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2354                      next + 0, is_midchain, &error0);
2355
2356       if (is_mcast)
2357         {
2358           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2359                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2360                     IP4_ERROR_SAME_INTERFACE : error0);
2361         }
2362
2363       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2364        * to see the IP header */
2365       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2366         {
2367           u32 next_index = adj0[0].rewrite_header.next_index;
2368           vlib_buffer_advance (b[0], -(word) rw_len0);
2369           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2370           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2371
2372           if (PREDICT_FALSE
2373               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2374             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2375                                                 tx_sw_if_index0,
2376                                                 &next_index, b[0],
2377                                                 adj0->ia_cfg_index);
2378           next[0] = next_index;
2379
2380           if (is_midchain)
2381             {
2382               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2383                                           0 /* is_ip6 */ );
2384
2385               /* Guess we are only writing on ipv4 header. */
2386               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2387             }
2388           else
2389             /* Guess we are only writing on simple Ethernet header. */
2390             vnet_rewrite_one_header (adj0[0], ip0,
2391                                      sizeof (ethernet_header_t));
2392
2393           /*
2394            * Bump the per-adjacency counters
2395            */
2396           if (do_counters)
2397             vlib_increment_combined_counter
2398               (&adjacency_counters,
2399                thread_index,
2400                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2401                                                            b[0]) + rw_len0);
2402
2403           if (is_midchain)
2404             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2405
2406           if (is_mcast)
2407             /* copy bytes from the IP address into the MAC rewrite */
2408             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2409                                         adj0->rewrite_header.dst_mcast_offset,
2410                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2411         }
2412       else
2413         {
2414           b[0]->error = error_node->errors[error0];
2415           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2416             ip4_ttl_inc (b[0], ip0);
2417         }
2418
2419       next += 1;
2420       b += 1;
2421       n_left_from -= 1;
2422     }
2423 #endif
2424
2425   while (n_left_from > 0)
2426     {
2427       ip_adjacency_t *adj0;
2428       ip4_header_t *ip0;
2429       u32 rw_len0, adj_index0, error0;
2430       u32 tx_sw_if_index0;
2431
2432       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2433
2434       adj0 = adj_get (adj_index0);
2435
2436       if (do_counters)
2437         vlib_prefetch_combined_counter (&adjacency_counters,
2438                                         thread_index, adj_index0);
2439
2440       ip0 = vlib_buffer_get_current (b[0]);
2441
2442       error0 = IP4_ERROR_NONE;
2443
2444       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2445
2446
2447       /* Update packet buffer attributes/set output interface. */
2448       rw_len0 = adj0[0].rewrite_header.data_bytes;
2449       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2450
2451       /* Check MTU of outgoing interface. */
2452       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2453       if (b[0]->flags & VNET_BUFFER_F_GSO)
2454         ip0_len = gso_mtu_sz (b[0]);
2455
2456       ip4_mtu_check (b[0], ip0_len,
2457                      adj0[0].rewrite_header.max_l3_packet_bytes,
2458                      ip0->flags_and_fragment_offset &
2459                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2460                      next + 0, is_midchain, &error0);
2461
2462       if (is_mcast)
2463         {
2464           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2465                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2466                     IP4_ERROR_SAME_INTERFACE : error0);
2467         }
2468
2469       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2470        * to see the IP header */
2471       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2472         {
2473           u32 next_index = adj0[0].rewrite_header.next_index;
2474           vlib_buffer_advance (b[0], -(word) rw_len0);
2475           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2476           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2477
2478           if (PREDICT_FALSE
2479               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2480             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2481                                                 tx_sw_if_index0,
2482                                                 &next_index, b[0],
2483                                                 adj0->ia_cfg_index);
2484           next[0] = next_index;
2485
2486           if (is_midchain)
2487             {
2488               /* this acts on the packet that is about to be encapped */
2489               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2490                                           0 /* is_ip6 */ );
2491
2492               /* Guess we are only writing on ipv4 header. */
2493               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2494             }
2495           else
2496             /* Guess we are only writing on simple Ethernet header. */
2497             vnet_rewrite_one_header (adj0[0], ip0,
2498                                      sizeof (ethernet_header_t));
2499
2500           if (do_counters)
2501             vlib_increment_combined_counter
2502               (&adjacency_counters,
2503                thread_index, adj_index0, 1,
2504                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2505
2506           if (is_midchain)
2507             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2508
2509           if (is_mcast)
2510             /* copy bytes from the IP address into the MAC rewrite */
2511             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2512                                         adj0->rewrite_header.dst_mcast_offset,
2513                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2514         }
2515       else
2516         {
2517           b[0]->error = error_node->errors[error0];
2518           /* undo the TTL decrement - we'll be back to do it again */
2519           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2520             ip4_ttl_inc (b[0], ip0);
2521         }
2522
2523       next += 1;
2524       b += 1;
2525       n_left_from -= 1;
2526     }
2527
2528
2529   /* Need to do trace after rewrites to pick up new packet data. */
2530   if (node->flags & VLIB_NODE_FLAG_TRACE)
2531     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2532
2533   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2534   return frame->n_vectors;
2535 }
2536
2537 /** @brief IPv4 rewrite node.
2538     @node ip4-rewrite
2539
2540     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2541     header checksum, fetch the ip adjacency, check the outbound mtu,
2542     apply the adjacency rewrite, and send pkts to the adjacency
2543     rewrite header's rewrite_next_index.
2544
2545     @param vm vlib_main_t corresponding to the current thread
2546     @param node vlib_node_runtime_t
2547     @param frame vlib_frame_t whose contents should be dispatched
2548
2549     @par Graph mechanics: buffer metadata, next index usage
2550
2551     @em Uses:
2552     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2553         - the rewrite adjacency index
2554     - <code>adj->lookup_next_index</code>
2555         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2556           the packet will be dropped.
2557     - <code>adj->rewrite_header</code>
2558         - Rewrite string length, rewrite string, next_index
2559
2560     @em Sets:
2561     - <code>b->current_data, b->current_length</code>
2562         - Updated net of applying the rewrite string
2563
2564     <em>Next Indices:</em>
2565     - <code> adj->rewrite_header.next_index </code>
2566       or @c ip4-drop
2567 */
2568
2569 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2570                                  vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2576 }
2577
2578 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2579                                        vlib_node_runtime_t * node,
2580                                        vlib_frame_t * frame)
2581 {
2582   if (adj_are_counters_enabled ())
2583     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2584   else
2585     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2586 }
2587
2588 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2589                                   vlib_node_runtime_t * node,
2590                                   vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2596 }
2597
2598 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2599                                        vlib_node_runtime_t * node,
2600                                        vlib_frame_t * frame)
2601 {
2602   if (adj_are_counters_enabled ())
2603     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2604   else
2605     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2606 }
2607
2608 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2609                                         vlib_node_runtime_t * node,
2610                                         vlib_frame_t * frame)
2611 {
2612   if (adj_are_counters_enabled ())
2613     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2614   else
2615     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2616 }
2617
2618 /* *INDENT-OFF* */
2619 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2620   .name = "ip4-rewrite",
2621   .vector_size = sizeof (u32),
2622
2623   .format_trace = format_ip4_rewrite_trace,
2624
2625   .n_next_nodes = IP4_REWRITE_N_NEXT,
2626   .next_nodes = {
2627     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2628     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2629     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2630   },
2631 };
2632
2633 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2634   .name = "ip4-rewrite-bcast",
2635   .vector_size = sizeof (u32),
2636
2637   .format_trace = format_ip4_rewrite_trace,
2638   .sibling_of = "ip4-rewrite",
2639 };
2640
2641 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2642   .name = "ip4-rewrite-mcast",
2643   .vector_size = sizeof (u32),
2644
2645   .format_trace = format_ip4_rewrite_trace,
2646   .sibling_of = "ip4-rewrite",
2647 };
2648
2649 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2650   .name = "ip4-mcast-midchain",
2651   .vector_size = sizeof (u32),
2652
2653   .format_trace = format_ip4_rewrite_trace,
2654   .sibling_of = "ip4-rewrite",
2655 };
2656
2657 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2658   .name = "ip4-midchain",
2659   .vector_size = sizeof (u32),
2660   .format_trace = format_ip4_rewrite_trace,
2661   .sibling_of = "ip4-rewrite",
2662 };
2663 /* *INDENT-ON */
2664
2665 static int
2666 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2667 {
2668   ip4_fib_mtrie_t *mtrie0;
2669   ip4_fib_mtrie_leaf_t leaf0;
2670   u32 lbi0;
2671
2672   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2673
2674   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2675   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2676   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2677
2678   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2679
2680   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2681 }
2682
2683 static clib_error_t *
2684 test_lookup_command_fn (vlib_main_t * vm,
2685                         unformat_input_t * input, vlib_cli_command_t * cmd)
2686 {
2687   ip4_fib_t *fib;
2688   u32 table_id = 0;
2689   f64 count = 1;
2690   u32 n;
2691   int i;
2692   ip4_address_t ip4_base_address;
2693   u64 errors = 0;
2694
2695   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2696     {
2697       if (unformat (input, "table %d", &table_id))
2698         {
2699           /* Make sure the entry exists. */
2700           fib = ip4_fib_get (table_id);
2701           if ((fib) && (fib->index != table_id))
2702             return clib_error_return (0, "<fib-index> %d does not exist",
2703                                       table_id);
2704         }
2705       else if (unformat (input, "count %f", &count))
2706         ;
2707
2708       else if (unformat (input, "%U",
2709                          unformat_ip4_address, &ip4_base_address))
2710         ;
2711       else
2712         return clib_error_return (0, "unknown input `%U'",
2713                                   format_unformat_error, input);
2714     }
2715
2716   n = count;
2717
2718   for (i = 0; i < n; i++)
2719     {
2720       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2721         errors++;
2722
2723       ip4_base_address.as_u32 =
2724         clib_host_to_net_u32 (1 +
2725                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2726     }
2727
2728   if (errors)
2729     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2730   else
2731     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2732
2733   return 0;
2734 }
2735
2736 /*?
2737  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2738  * given FIB table to determine if there is a conflict with the
2739  * adjacency table. The fib-id can be determined by using the
2740  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2741  * of 0 is used.
2742  *
2743  * @todo This command uses fib-id, other commands use table-id (not
2744  * just a name, they are different indexes). Would like to change this
2745  * to table-id for consistency.
2746  *
2747  * @cliexpar
2748  * Example of how to run the test lookup command:
2749  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2750  * No errors in 2 lookups
2751  * @cliexend
2752 ?*/
2753 /* *INDENT-OFF* */
2754 VLIB_CLI_COMMAND (lookup_test_command, static) =
2755 {
2756   .path = "test lookup",
2757   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2758   .function = test_lookup_command_fn,
2759 };
2760 /* *INDENT-ON* */
2761
2762 static clib_error_t *
2763 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2764                              unformat_input_t * input,
2765                              vlib_cli_command_t * cmd)
2766 {
2767   int matched = 0;
2768   u32 table_id = 0;
2769   u32 flow_hash_config = 0;
2770   int rv;
2771
2772   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2773     {
2774       if (unformat (input, "table %d", &table_id))
2775         matched = 1;
2776 #define _(a, b, v)                                                            \
2777   else if (unformat (input, #a))                                              \
2778   {                                                                           \
2779     flow_hash_config |= v;                                                    \
2780     matched = 1;                                                              \
2781   }
2782       foreach_flow_hash_bit
2783 #undef _
2784         else
2785         break;
2786     }
2787
2788   if (matched == 0)
2789     return clib_error_return (0, "unknown input `%U'",
2790                               format_unformat_error, input);
2791
2792   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2793   switch (rv)
2794     {
2795     case 0:
2796       break;
2797
2798     case VNET_API_ERROR_NO_SUCH_FIB:
2799       return clib_error_return (0, "no such FIB table %d", table_id);
2800
2801     default:
2802       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2803       break;
2804     }
2805
2806   return 0;
2807 }
2808
2809 /*?
2810  * Configure the set of IPv4 fields used by the flow hash.
2811  *
2812  * @cliexpar
2813  * Example of how to set the flow hash on a given table:
2814  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2815  * Example of display the configured flow hash:
2816  * @cliexstart{show ip fib}
2817  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2818  * 0.0.0.0/0
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2821  *     [0] [@0]: dpo-drop ip6
2822  * 0.0.0.0/32
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * 224.0.0.0/8
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2829  *     [0] [@0]: dpo-drop ip6
2830  * 6.0.1.2/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2833  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2834  * 7.0.0.1/32
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2837  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2838  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2839  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2840  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2841  * 240.0.0.0/8
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2844  *     [0] [@0]: dpo-drop ip6
2845  * 255.255.255.255/32
2846  *   unicast-ip4-chain
2847  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2848  *     [0] [@0]: dpo-drop ip6
2849  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2850  * 0.0.0.0/0
2851  *   unicast-ip4-chain
2852  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2853  *     [0] [@0]: dpo-drop ip6
2854  * 0.0.0.0/32
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2857  *     [0] [@0]: dpo-drop ip6
2858  * 172.16.1.0/24
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2861  *     [0] [@4]: ipv4-glean: af_packet0
2862  * 172.16.1.1/32
2863  *   unicast-ip4-chain
2864  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2865  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2866  * 172.16.1.2/32
2867  *   unicast-ip4-chain
2868  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2869  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2870  * 172.16.2.0/24
2871  *   unicast-ip4-chain
2872  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2873  *     [0] [@4]: ipv4-glean: af_packet1
2874  * 172.16.2.1/32
2875  *   unicast-ip4-chain
2876  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2877  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2878  * 224.0.0.0/8
2879  *   unicast-ip4-chain
2880  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2881  *     [0] [@0]: dpo-drop ip6
2882  * 240.0.0.0/8
2883  *   unicast-ip4-chain
2884  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2885  *     [0] [@0]: dpo-drop ip6
2886  * 255.255.255.255/32
2887  *   unicast-ip4-chain
2888  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2889  *     [0] [@0]: dpo-drop ip6
2890  * @cliexend
2891 ?*/
2892 /* *INDENT-OFF* */
2893 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2894 {
2895   .path = "set ip flow-hash",
2896   .short_help =
2897   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2898   .function = set_ip_flow_hash_command_fn,
2899 };
2900 /* *INDENT-ON* */
2901
2902 #ifndef CLIB_MARCH_VARIANT
2903 int
2904 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2905                              u32 table_index)
2906 {
2907   vnet_main_t *vnm = vnet_get_main ();
2908   vnet_interface_main_t *im = &vnm->interface_main;
2909   ip4_main_t *ipm = &ip4_main;
2910   ip_lookup_main_t *lm = &ipm->lookup_main;
2911   vnet_classify_main_t *cm = &vnet_classify_main;
2912   ip4_address_t *if_addr;
2913
2914   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2915     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2916
2917   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2918     return VNET_API_ERROR_NO_SUCH_ENTRY;
2919
2920   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2921   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2922
2923   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2924
2925   if (NULL != if_addr)
2926     {
2927       fib_prefix_t pfx = {
2928         .fp_len = 32,
2929         .fp_proto = FIB_PROTOCOL_IP4,
2930         .fp_addr.ip4 = *if_addr,
2931       };
2932       u32 fib_index;
2933
2934       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2935                                                        sw_if_index);
2936
2937
2938       if (table_index != (u32) ~ 0)
2939         {
2940           dpo_id_t dpo = DPO_INVALID;
2941
2942           dpo_set (&dpo,
2943                    DPO_CLASSIFY,
2944                    DPO_PROTO_IP4,
2945                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2946
2947           fib_table_entry_special_dpo_add (fib_index,
2948                                            &pfx,
2949                                            FIB_SOURCE_CLASSIFY,
2950                                            FIB_ENTRY_FLAG_NONE, &dpo);
2951           dpo_reset (&dpo);
2952         }
2953       else
2954         {
2955           fib_table_entry_special_remove (fib_index,
2956                                           &pfx, FIB_SOURCE_CLASSIFY);
2957         }
2958     }
2959
2960   return 0;
2961 }
2962 #endif
2963
2964 static clib_error_t *
2965 set_ip_classify_command_fn (vlib_main_t * vm,
2966                             unformat_input_t * input,
2967                             vlib_cli_command_t * cmd)
2968 {
2969   u32 table_index = ~0;
2970   int table_index_set = 0;
2971   u32 sw_if_index = ~0;
2972   int rv;
2973
2974   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2975     {
2976       if (unformat (input, "table-index %d", &table_index))
2977         table_index_set = 1;
2978       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2979                          vnet_get_main (), &sw_if_index))
2980         ;
2981       else
2982         break;
2983     }
2984
2985   if (table_index_set == 0)
2986     return clib_error_return (0, "classify table-index must be specified");
2987
2988   if (sw_if_index == ~0)
2989     return clib_error_return (0, "interface / subif must be specified");
2990
2991   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2992
2993   switch (rv)
2994     {
2995     case 0:
2996       break;
2997
2998     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2999       return clib_error_return (0, "No such interface");
3000
3001     case VNET_API_ERROR_NO_SUCH_ENTRY:
3002       return clib_error_return (0, "No such classifier table");
3003     }
3004   return 0;
3005 }
3006
3007 /*?
3008  * Assign a classification table to an interface. The classification
3009  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3010  * commands. Once the table is create, use this command to filter packets
3011  * on an interface.
3012  *
3013  * @cliexpar
3014  * Example of how to assign a classification table to an interface:
3015  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3016 ?*/
3017 /* *INDENT-OFF* */
3018 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3019 {
3020     .path = "set ip classify",
3021     .short_help =
3022     "set ip classify intfc <interface> table-index <classify-idx>",
3023     .function = set_ip_classify_command_fn,
3024 };
3025 /* *INDENT-ON* */
3026
3027 /*
3028  * fd.io coding-style-patch-verification: ON
3029  *
3030  * Local Variables:
3031  * eval: (c-set-style "gnu")
3032  * End:
3033  */