pg: Reduce the inclusion of pg.h
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58 #include <vnet/pg/pg.h>
59
60 #include <vnet/ip/ip4_forward.h>
61 #include <vnet/interface_output.h>
62 #include <vnet/classify/vnet_classify.h>
63
64 /** @brief IPv4 lookup node.
65     @node ip4-lookup
66
67     This is the main IPv4 lookup dispatch node.
68
69     @param vm vlib_main_t corresponding to the current thread
70     @param node vlib_node_runtime_t
71     @param frame vlib_frame_t whose contents should be dispatched
72
73     @par Graph mechanics: buffer metadata, next index usage
74
75     @em Uses:
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
77         - Indicates the @c sw_if_index value of the interface that the
78           packet was received on.
79     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
80         - When the value is @c ~0 then the node performs a longest prefix
81           match (LPM) for the packet destination address in the FIB attached
82           to the receive interface.
83         - Otherwise perform LPM for the packet destination address in the
84           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
85           value (0, 1, ...) and not a VRF id.
86
87     @em Sets:
88     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
89         - The lookup result adjacency index.
90
91     <em>Next Index:</em>
92     - Dispatches the packet to the node index found in
93       ip_adjacency_t @c adj->lookup_next_index
94       (where @c adj is the lookup result adjacency).
95 */
96 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
97                                 vlib_frame_t * frame)
98 {
99   return ip4_lookup_inline (vm, node, frame);
100 }
101
102 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
103
104 /* *INDENT-OFF* */
105 VLIB_REGISTER_NODE (ip4_lookup_node) =
106 {
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 /* *INDENT-OFF* */
270 VLIB_REGISTER_NODE (ip4_load_balance_node) =
271 {
272   .name = "ip4-load-balance",
273   .vector_size = sizeof (u32),
274   .sibling_of = "ip4-lookup",
275   .format_trace = format_ip4_lookup_trace,
276 };
277 /* *INDENT-ON* */
278
279 #ifndef CLIB_MARCH_VARIANT
280 /* get first interface address */
281 ip4_address_t *
282 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
283                              ip_interface_address_t ** result_ia)
284 {
285   ip_lookup_main_t *lm = &im->lookup_main;
286   ip_interface_address_t *ia = 0;
287   ip4_address_t *result = 0;
288
289   /* *INDENT-OFF* */
290   foreach_ip_interface_address
291     (lm, ia, sw_if_index,
292      1 /* honor unnumbered */ ,
293      ({
294        ip4_address_t * a =
295          ip_interface_address_get_address (lm, ia);
296        result = a;
297        break;
298      }));
299   /* *INDENT-OFF* */
300   if (result_ia)
301     *result_ia = result ? ia : 0;
302   return result;
303 }
304 #endif
305
306 static void
307 ip4_add_subnet_bcast_route (u32 fib_index,
308                             fib_prefix_t *pfx,
309                             u32 sw_if_index)
310 {
311   vnet_sw_interface_flags_t iflags;
312
313   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
314
315   fib_table_entry_special_remove(fib_index,
316                                  pfx,
317                                  FIB_SOURCE_INTERFACE);
318
319   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
320     {
321       fib_table_entry_update_one_path (fib_index, pfx,
322                                        FIB_SOURCE_INTERFACE,
323                                        FIB_ENTRY_FLAG_NONE,
324                                        DPO_PROTO_IP4,
325                                        /* No next-hop address */
326                                        &ADJ_BCAST_ADDR,
327                                        sw_if_index,
328                                        // invalid FIB index
329                                        ~0,
330                                        1,
331                                        // no out-label stack
332                                        NULL,
333                                        FIB_ROUTE_PATH_FLAG_NONE);
334     }
335   else
336     {
337         fib_table_entry_special_add(fib_index,
338                                     pfx,
339                                     FIB_SOURCE_INTERFACE,
340                                     (FIB_ENTRY_FLAG_DROP |
341                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342     }
343 }
344
345 static void
346 ip4_add_interface_prefix_routes (ip4_main_t *im,
347                                  u32 sw_if_index,
348                                  u32 fib_index,
349                                  ip_interface_address_t * a)
350 {
351   ip_lookup_main_t *lm = &im->lookup_main;
352   ip_interface_prefix_t *if_prefix;
353   ip4_address_t *address = ip_interface_address_get_address (lm, a);
354
355   ip_interface_prefix_key_t key = {
356     .prefix = {
357       .fp_len = a->address_length,
358       .fp_proto = FIB_PROTOCOL_IP4,
359       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
360     },
361     .sw_if_index = sw_if_index,
362   };
363
364   fib_prefix_t pfx_special = {
365     .fp_proto = FIB_PROTOCOL_IP4,
366   };
367
368   /* If prefix already set on interface, just increment ref count & return */
369   if_prefix = ip_get_interface_prefix (lm, &key);
370   if (if_prefix)
371     {
372       if_prefix->ref_count += 1;
373       return;
374     }
375
376   /* New prefix - allocate a pool entry, initialize it, add to the hash */
377   pool_get (lm->if_prefix_pool, if_prefix);
378   if_prefix->ref_count = 1;
379   if_prefix->src_ia_index = a - lm->if_address_pool;
380   clib_memcpy (&if_prefix->key, &key, sizeof (key));
381   mhash_set (&lm->prefix_to_if_prefix_index, &key,
382              if_prefix - lm->if_prefix_pool, 0 /* old value */);
383
384   pfx_special.fp_len = a->address_length;
385   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387   /* set the glean route for the prefix */
388   fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                    FIB_SOURCE_INTERFACE,
390                                    (FIB_ENTRY_FLAG_CONNECTED |
391                                     FIB_ENTRY_FLAG_ATTACHED),
392                                    DPO_PROTO_IP4,
393                                    /* No next-hop address */
394                                    NULL,
395                                    sw_if_index,
396                                    /* invalid FIB index */
397                                    ~0,
398                                    1,
399                                    /* no out-label stack */
400                                    NULL,
401                                    FIB_ROUTE_PATH_FLAG_NONE);
402
403   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
404   if (a->address_length <= 30)
405     {
406       /* set a drop route for the base address of the prefix */
407       pfx_special.fp_len = 32;
408       pfx_special.fp_addr.ip4.as_u32 =
409         address->as_u32 & im->fib_masks[a->address_length];
410
411       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
412         fib_table_entry_special_add (fib_index, &pfx_special,
413                                      FIB_SOURCE_INTERFACE,
414                                      (FIB_ENTRY_FLAG_DROP |
415                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
416
417       /* set a route for the broadcast address of the prefix */
418       pfx_special.fp_len = 32;
419       pfx_special.fp_addr.ip4.as_u32 =
420         address->as_u32 | ~im->fib_masks[a->address_length];
421       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
422         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
423
424
425     }
426   /* length == 31 - add an attached route for the other address */
427   else if (a->address_length == 31)
428     {
429       pfx_special.fp_len = 32;
430       pfx_special.fp_addr.ip4.as_u32 =
431         address->as_u32 ^ clib_host_to_net_u32(1);
432
433       fib_table_entry_update_one_path (fib_index, &pfx_special,
434                                        FIB_SOURCE_INTERFACE,
435                                        (FIB_ENTRY_FLAG_ATTACHED),
436                                        DPO_PROTO_IP4,
437                                        &pfx_special.fp_addr,
438                                        sw_if_index,
439                                        /* invalid FIB index */
440                                        ~0,
441                                        1,
442                                        NULL,
443                                        FIB_ROUTE_PATH_FLAG_NONE);
444     }
445 }
446
447 static void
448 ip4_add_interface_routes (u32 sw_if_index,
449                           ip4_main_t * im, u32 fib_index,
450                           ip_interface_address_t * a)
451 {
452   ip_lookup_main_t *lm = &im->lookup_main;
453   ip4_address_t *address = ip_interface_address_get_address (lm, a);
454   fib_prefix_t pfx = {
455     .fp_len = 32,
456     .fp_proto = FIB_PROTOCOL_IP4,
457     .fp_addr.ip4 = *address,
458   };
459
460   /* set special routes for the prefix if needed */
461   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
462
463   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
464     {
465       u32 classify_table_index =
466         lm->classify_table_index_by_sw_if_index[sw_if_index];
467       if (classify_table_index != (u32) ~ 0)
468         {
469           dpo_id_t dpo = DPO_INVALID;
470
471           dpo_set (&dpo,
472                    DPO_CLASSIFY,
473                    DPO_PROTO_IP4,
474                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
475
476           fib_table_entry_special_dpo_add (fib_index,
477                                            &pfx,
478                                            FIB_SOURCE_CLASSIFY,
479                                            FIB_ENTRY_FLAG_NONE, &dpo);
480           dpo_reset (&dpo);
481         }
482     }
483
484   fib_table_entry_update_one_path (fib_index, &pfx,
485                                    FIB_SOURCE_INTERFACE,
486                                    (FIB_ENTRY_FLAG_CONNECTED |
487                                     FIB_ENTRY_FLAG_LOCAL),
488                                    DPO_PROTO_IP4,
489                                    &pfx.fp_addr,
490                                    sw_if_index,
491                                    // invalid FIB index
492                                    ~0,
493                                    1, NULL,
494                                    FIB_ROUTE_PATH_FLAG_NONE);
495 }
496
497 static void
498 ip4_del_interface_prefix_routes (ip4_main_t * im,
499                                  u32 sw_if_index,
500                                  u32 fib_index,
501                                  ip4_address_t * address,
502                                  u32 address_length)
503 {
504   ip_lookup_main_t *lm = &im->lookup_main;
505   ip_interface_prefix_t *if_prefix;
506
507   ip_interface_prefix_key_t key = {
508     .prefix = {
509       .fp_len = address_length,
510       .fp_proto = FIB_PROTOCOL_IP4,
511       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
512     },
513     .sw_if_index = sw_if_index,
514   };
515
516   fib_prefix_t pfx_special = {
517     .fp_len = 32,
518     .fp_proto = FIB_PROTOCOL_IP4,
519   };
520
521   if_prefix = ip_get_interface_prefix (lm, &key);
522   if (!if_prefix)
523     {
524       clib_warning ("Prefix not found while deleting %U",
525                     format_ip4_address_and_length, address, address_length);
526       return;
527     }
528
529   if_prefix->ref_count -= 1;
530
531   /*
532    * Routes need to be adjusted if deleting last intf addr in prefix
533    *
534    * We're done now otherwise
535    */
536   if (if_prefix->ref_count > 0)
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542       /* Less work to do in FIB if we remove the covered /32s first */
543
544       /* first address in prefix */
545       pfx_special.fp_addr.ip4.as_u32 =
546         address->as_u32 & im->fib_masks[address_length];
547       pfx_special.fp_len = 32;
548
549       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
550         fib_table_entry_special_remove (fib_index,
551                                         &pfx_special,
552                                         FIB_SOURCE_INTERFACE);
553
554       /* prefix broadcast address */
555       pfx_special.fp_addr.ip4.as_u32 =
556         address->as_u32 | ~im->fib_masks[address_length];
557       pfx_special.fp_len = 32;
558
559       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
560         fib_table_entry_special_remove (fib_index,
561                                         &pfx_special,
562                                         FIB_SOURCE_INTERFACE);
563     }
564   else if (address_length == 31)
565     {
566       /* length == 31, delete attached route for the other address */
567       pfx_special.fp_addr.ip4.as_u32 =
568         address->as_u32 ^ clib_host_to_net_u32(1);
569
570       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
571     }
572
573   /* remove glean route for prefix */
574   pfx_special.fp_addr.ip4 = *address;
575   pfx_special.fp_len = address_length;
576   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
577
578   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
579   pool_put (lm->if_prefix_pool, if_prefix);
580 }
581
582 static void
583 ip4_del_interface_routes (u32 sw_if_index,
584                           ip4_main_t * im,
585                           u32 fib_index,
586                           ip4_address_t * address, u32 address_length)
587 {
588   fib_prefix_t pfx = {
589     .fp_len = 32,
590     .fp_proto = FIB_PROTOCOL_IP4,
591     .fp_addr.ip4 = *address,
592   };
593
594   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
595
596   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
597                                    address, address_length);
598 }
599
600 #ifndef CLIB_MARCH_VARIANT
601 void
602 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
603 {
604   ip4_main_t *im = &ip4_main;
605   vnet_main_t *vnm = vnet_get_main ();
606   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
607
608   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
609
610   /*
611    * enable/disable only on the 1<->0 transition
612    */
613   if (is_enable)
614     {
615       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
616         return;
617     }
618   else
619     {
620       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
621       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
622         return;
623     }
624   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
625                                !is_enable, 0, 0);
626
627
628   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
629                                sw_if_index, !is_enable, 0, 0);
630
631   if (is_enable)
632     hi->l3_if_count++;
633   else if (hi->l3_if_count)
634     hi->l3_if_count--;
635
636   {
637     ip4_enable_disable_interface_callback_t *cb;
638     vec_foreach (cb, im->enable_disable_interface_callbacks)
639       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
640   }
641 }
642
643 static clib_error_t *
644 ip4_add_del_interface_address_internal (vlib_main_t * vm,
645                                         u32 sw_if_index,
646                                         ip4_address_t * address,
647                                         u32 address_length, u32 is_del)
648 {
649   vnet_main_t *vnm = vnet_get_main ();
650   ip4_main_t *im = &ip4_main;
651   ip_lookup_main_t *lm = &im->lookup_main;
652   clib_error_t *error = 0;
653   u32 if_address_index;
654   ip4_address_fib_t ip4_af, *addr_fib = 0;
655
656   /* local0 interface doesn't support IP addressing  */
657   if (sw_if_index == 0)
658     {
659       return
660        clib_error_create ("local0 interface doesn't support IP addressing");
661     }
662
663   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
664   ip4_addr_fib_init (&ip4_af, address,
665                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
666   vec_add1 (addr_fib, ip4_af);
667
668   /*
669    * there is no support for adj-fib handling in the presence of overlapping
670    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
671    * most routers do.
672    */
673   /* *INDENT-OFF* */
674   if (!is_del)
675     {
676       /* When adding an address check that it does not conflict
677          with an existing address on any interface in this table. */
678       ip_interface_address_t *ia;
679       vnet_sw_interface_t *sif;
680
681       pool_foreach (sif, vnm->interface_main.sw_interfaces)
682        {
683           if (im->fib_index_by_sw_if_index[sw_if_index] ==
684               im->fib_index_by_sw_if_index[sif->sw_if_index])
685             {
686               foreach_ip_interface_address
687                 (&im->lookup_main, ia, sif->sw_if_index,
688                  0 /* honor unnumbered */ ,
689                  ({
690                    ip4_address_t * x =
691                      ip_interface_address_get_address
692                      (&im->lookup_main, ia);
693
694                    if (ip4_destination_matches_route
695                        (im, address, x, ia->address_length) ||
696                        ip4_destination_matches_route (im,
697                                                       x,
698                                                       address,
699                                                       address_length))
700                      {
701                        /* an intf may have >1 addr from the same prefix */
702                        if ((sw_if_index == sif->sw_if_index) &&
703                            (ia->address_length == address_length) &&
704                            (x->as_u32 != address->as_u32))
705                          continue;
706
707                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
708                          /* if the address we're comparing against is stale
709                           * then the CP has not added this one back yet, maybe
710                           * it never will, so we have to assume it won't and
711                           * ignore it. if it does add it back, then it will fail
712                           * because this one is now present */
713                          continue;
714
715                        /* error if the length or intf was different */
716                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
717
718                        error = clib_error_create
719                          ("failed to add %U on %U which conflicts with %U for interface %U",
720                           format_ip4_address_and_length, address,
721                           address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sw_if_index,
724                           format_ip4_address_and_length, x,
725                           ia->address_length,
726                           format_vnet_sw_if_index_name, vnm,
727                           sif->sw_if_index);
728                        goto done;
729                      }
730                  }));
731             }
732       }
733     }
734   /* *INDENT-ON* */
735
736   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
737
738   if (is_del)
739     {
740       if (~0 == if_address_index)
741         {
742           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
743           error = clib_error_create ("%U not found for interface %U",
744                                      lm->format_address_and_length,
745                                      addr_fib, address_length,
746                                      format_vnet_sw_if_index_name, vnm,
747                                      sw_if_index);
748           goto done;
749         }
750
751       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
752                                         address_length, sw_if_index);
753       if (error)
754         goto done;
755     }
756   else
757     {
758       if (~0 != if_address_index)
759         {
760           ip_interface_address_t *ia;
761
762           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
763
764           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
765             {
766               if (ia->sw_if_index == sw_if_index)
767                 {
768                   /* re-adding an address during the replace action.
769                    * consdier this the update. clear the flag and
770                    * we're done */
771                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
772                   goto done;
773                 }
774               else
775                 {
776                   /* The prefix is moving from one interface to another.
777                    * delete the stale and add the new */
778                   ip4_add_del_interface_address_internal (vm,
779                                                           ia->sw_if_index,
780                                                           address,
781                                                           address_length, 1);
782                   ia = NULL;
783                   error = ip_interface_address_add (lm, sw_if_index,
784                                                     addr_fib, address_length,
785                                                     &if_address_index);
786                 }
787             }
788           else
789             {
790               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
791               error = clib_error_create
792                 ("Prefix %U already found on interface %U",
793                  lm->format_address_and_length, addr_fib, address_length,
794                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
795             }
796         }
797       else
798         error = ip_interface_address_add (lm, sw_if_index,
799                                           addr_fib, address_length,
800                                           &if_address_index);
801     }
802
803   if (error)
804     goto done;
805
806   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
807   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
808
809   /* intf addr routes are added/deleted on admin up/down */
810   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
811     {
812       if (is_del)
813         ip4_del_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index, address,
815                                   address_length);
816       else
817         ip4_add_interface_routes (sw_if_index,
818                                   im, ip4_af.fib_index,
819                                   pool_elt_at_index
820                                   (lm->if_address_pool, if_address_index));
821     }
822
823   ip4_add_del_interface_address_callback_t *cb;
824   vec_foreach (cb, im->add_del_interface_address_callbacks)
825     cb->function (im, cb->function_opaque, sw_if_index,
826                   address, address_length, if_address_index, is_del);
827
828 done:
829   vec_free (addr_fib);
830   return error;
831 }
832
833 clib_error_t *
834 ip4_add_del_interface_address (vlib_main_t * vm,
835                                u32 sw_if_index,
836                                ip4_address_t * address,
837                                u32 address_length, u32 is_del)
838 {
839   return ip4_add_del_interface_address_internal
840     (vm, sw_if_index, address, address_length, is_del);
841 }
842
843 void
844 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
845 {
846   ip_interface_address_t *ia;
847   ip4_main_t *im;
848
849   im = &ip4_main;
850
851   /*
852    * when directed broadcast is enabled, the subnet braodcast route will forward
853    * packets using an adjacency with a broadcast MAC. otherwise it drops
854    */
855   /* *INDENT-OFF* */
856   foreach_ip_interface_address(&im->lookup_main, ia,
857                                sw_if_index, 0,
858      ({
859        if (ia->address_length <= 30)
860          {
861            ip4_address_t *ipa;
862
863            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
864
865            fib_prefix_t pfx = {
866              .fp_len = 32,
867              .fp_proto = FIB_PROTOCOL_IP4,
868              .fp_addr = {
869                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
870              },
871            };
872
873            ip4_add_subnet_bcast_route
874              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
875                                                   sw_if_index),
876               &pfx, sw_if_index);
877          }
878      }));
879   /* *INDENT-ON* */
880 }
881 #endif
882
883 static clib_error_t *
884 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
885 {
886   ip4_main_t *im = &ip4_main;
887   ip_interface_address_t *ia;
888   ip4_address_t *a;
889   u32 is_admin_up, fib_index;
890
891   /* Fill in lookup tables with default table (0). */
892   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
893
894   vec_validate_init_empty (im->
895                            lookup_main.if_address_pool_index_by_sw_if_index,
896                            sw_if_index, ~0);
897
898   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
899
900   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
901
902   /* *INDENT-OFF* */
903   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
904                                 0 /* honor unnumbered */,
905   ({
906     a = ip_interface_address_get_address (&im->lookup_main, ia);
907     if (is_admin_up)
908       ip4_add_interface_routes (sw_if_index,
909                                 im, fib_index,
910                                 ia);
911     else
912       ip4_del_interface_routes (sw_if_index,
913                                 im, fib_index,
914                                 a, ia->address_length);
915   }));
916   /* *INDENT-ON* */
917
918   return 0;
919 }
920
921 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
922
923 /* Built-in ip4 unicast rx feature path definition */
924 /* *INDENT-OFF* */
925 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
929   .last_in_arc = "ip4-lookup",
930   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
931 };
932
933 VNET_FEATURE_INIT (ip4_flow_classify, static) =
934 {
935   .arc_name = "ip4-unicast",
936   .node_name = "ip4-flow-classify",
937   .runs_before = VNET_FEATURES ("ip4-inacl"),
938 };
939
940 VNET_FEATURE_INIT (ip4_inacl, static) =
941 {
942   .arc_name = "ip4-unicast",
943   .node_name = "ip4-inacl",
944   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
945 };
946
947 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
948 {
949   .arc_name = "ip4-unicast",
950   .node_name = "ip4-source-and-port-range-check-rx",
951   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
952 };
953
954 VNET_FEATURE_INIT (ip4_policer_classify, static) =
955 {
956   .arc_name = "ip4-unicast",
957   .node_name = "ip4-policer-classify",
958   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
959 };
960
961 VNET_FEATURE_INIT (ip4_ipsec, static) =
962 {
963   .arc_name = "ip4-unicast",
964   .node_name = "ipsec4-input-feature",
965   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
966 };
967
968 VNET_FEATURE_INIT (ip4_vpath, static) =
969 {
970   .arc_name = "ip4-unicast",
971   .node_name = "vpath-input-ip4",
972   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
973 };
974
975 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
976 {
977   .arc_name = "ip4-unicast",
978   .node_name = "ip4-vxlan-bypass",
979   .runs_before = VNET_FEATURES ("ip4-lookup"),
980 };
981
982 VNET_FEATURE_INIT (ip4_not_enabled, static) =
983 {
984   .arc_name = "ip4-unicast",
985   .node_name = "ip4-not-enabled",
986   .runs_before = VNET_FEATURES ("ip4-lookup"),
987 };
988
989 VNET_FEATURE_INIT (ip4_lookup, static) =
990 {
991   .arc_name = "ip4-unicast",
992   .node_name = "ip4-lookup",
993   .runs_before = 0,     /* not before any other features */
994 };
995
996 /* Built-in ip4 multicast rx feature path definition */
997 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
998 {
999   .arc_name = "ip4-multicast",
1000   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1001   .last_in_arc = "ip4-mfib-forward-lookup",
1002   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1003 };
1004
1005 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1006 {
1007   .arc_name = "ip4-multicast",
1008   .node_name = "vpath-input-ip4",
1009   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1010 };
1011
1012 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1013 {
1014   .arc_name = "ip4-multicast",
1015   .node_name = "ip4-not-enabled",
1016   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1017 };
1018
1019 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1020 {
1021   .arc_name = "ip4-multicast",
1022   .node_name = "ip4-mfib-forward-lookup",
1023   .runs_before = 0,     /* last feature */
1024 };
1025
1026 /* Source and port-range check ip4 tx feature path definition */
1027 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1028 {
1029   .arc_name = "ip4-output",
1030   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1031   .last_in_arc = "interface-output",
1032   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1036 {
1037   .arc_name = "ip4-output",
1038   .node_name = "ip4-source-and-port-range-check-tx",
1039   .runs_before = VNET_FEATURES ("ip4-outacl"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_outacl, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "ip4-outacl",
1046   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1047 };
1048
1049 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1050 {
1051   .arc_name = "ip4-output",
1052   .node_name = "ipsec4-output-feature",
1053   .runs_before = VNET_FEATURES ("interface-output"),
1054 };
1055
1056 /* Built-in ip4 tx feature path definition */
1057 VNET_FEATURE_INIT (ip4_interface_output, static) =
1058 {
1059   .arc_name = "ip4-output",
1060   .node_name = "interface-output",
1061   .runs_before = 0,     /* not before any other features */
1062 };
1063 /* *INDENT-ON* */
1064
1065 static clib_error_t *
1066 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1067 {
1068   ip4_main_t *im = &ip4_main;
1069
1070   /* Fill in lookup tables with default table (0). */
1071   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1072   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1073
1074   if (!is_add)
1075     {
1076       ip4_main_t *im4 = &ip4_main;
1077       ip_lookup_main_t *lm4 = &im4->lookup_main;
1078       ip_interface_address_t *ia = 0;
1079       ip4_address_t *address;
1080       vlib_main_t *vm = vlib_get_main ();
1081
1082       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1083       /* *INDENT-OFF* */
1084       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1085       ({
1086         address = ip_interface_address_get_address (lm4, ia);
1087         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1088       }));
1089       /* *INDENT-ON* */
1090       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1091     }
1092
1093   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1094                                is_add, 0, 0);
1095
1096   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1097                                sw_if_index, is_add, 0, 0);
1098
1099   return /* no error */ 0;
1100 }
1101
1102 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1103
1104 /* Global IP4 main. */
1105 #ifndef CLIB_MARCH_VARIANT
1106 ip4_main_t ip4_main;
1107 #endif /* CLIB_MARCH_VARIANT */
1108
1109 static clib_error_t *
1110 ip4_lookup_init (vlib_main_t * vm)
1111 {
1112   ip4_main_t *im = &ip4_main;
1113   clib_error_t *error;
1114   uword i;
1115
1116   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1117     return error;
1118   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1119     return (error);
1120   if ((error = vlib_call_init_function (vm, fib_module_init)))
1121     return error;
1122   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1123     return error;
1124
1125   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1126     {
1127       u32 m;
1128
1129       if (i < 32)
1130         m = pow2_mask (i) << (32 - i);
1131       else
1132         m = ~0;
1133       im->fib_masks[i] = clib_host_to_net_u32 (m);
1134     }
1135
1136   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1137
1138   /* Create FIB with index 0 and table id of 0. */
1139   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1140                                      FIB_SOURCE_DEFAULT_ROUTE);
1141   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1142                                       MFIB_SOURCE_DEFAULT_ROUTE);
1143
1144   {
1145     pg_node_t *pn;
1146     pn = pg_get_node (ip4_lookup_node.index);
1147     pn->unformat_edit = unformat_pg_ip4_header;
1148   }
1149
1150   {
1151     ethernet_arp_header_t h;
1152
1153     clib_memset (&h, 0, sizeof (h));
1154
1155 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1156 #define _8(f,v) h.f = v;
1157     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1158     _16 (l3_type, ETHERNET_TYPE_IP4);
1159     _8 (n_l2_address_bytes, 6);
1160     _8 (n_l3_address_bytes, 4);
1161     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1162 #undef _16
1163 #undef _8
1164
1165     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1166                                /* data */ &h,
1167                                sizeof (h),
1168                                /* alloc chunk size */ 8,
1169                                "ip4 arp");
1170   }
1171
1172   return error;
1173 }
1174
1175 VLIB_INIT_FUNCTION (ip4_lookup_init);
1176
1177 typedef struct
1178 {
1179   /* Adjacency taken. */
1180   u32 dpo_index;
1181   u32 flow_hash;
1182   u32 fib_index;
1183
1184   /* Packet data, possibly *after* rewrite. */
1185   u8 packet_data[64 - 1 * sizeof (u32)];
1186 }
1187 ip4_forward_next_trace_t;
1188
1189 #ifndef CLIB_MARCH_VARIANT
1190 u8 *
1191 format_ip4_forward_next_trace (u8 * s, va_list * args)
1192 {
1193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196   u32 indent = format_get_indent (s);
1197   s = format (s, "%U%U",
1198               format_white_space, indent,
1199               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1200   return s;
1201 }
1202 #endif
1203
1204 static u8 *
1205 format_ip4_lookup_trace (u8 * s, va_list * args)
1206 {
1207   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1208   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1209   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1210   u32 indent = format_get_indent (s);
1211
1212   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1213               t->fib_index, t->dpo_index, t->flow_hash);
1214   s = format (s, "\n%U%U",
1215               format_white_space, indent,
1216               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1217   return s;
1218 }
1219
1220 static u8 *
1221 format_ip4_rewrite_trace (u8 * s, va_list * args)
1222 {
1223   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1224   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1225   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1226   u32 indent = format_get_indent (s);
1227
1228   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1229               t->fib_index, t->dpo_index, format_ip_adjacency,
1230               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1231   s = format (s, "\n%U%U",
1232               format_white_space, indent,
1233               format_ip_adjacency_packet_data,
1234               t->packet_data, sizeof (t->packet_data));
1235   return s;
1236 }
1237
1238 #ifndef CLIB_MARCH_VARIANT
1239 /* Common trace function for all ip4-forward next nodes. */
1240 void
1241 ip4_forward_next_trace (vlib_main_t * vm,
1242                         vlib_node_runtime_t * node,
1243                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1244 {
1245   u32 *from, n_left;
1246   ip4_main_t *im = &ip4_main;
1247
1248   n_left = frame->n_vectors;
1249   from = vlib_frame_vector_args (frame);
1250
1251   while (n_left >= 4)
1252     {
1253       u32 bi0, bi1;
1254       vlib_buffer_t *b0, *b1;
1255       ip4_forward_next_trace_t *t0, *t1;
1256
1257       /* Prefetch next iteration. */
1258       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1259       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1260
1261       bi0 = from[0];
1262       bi1 = from[1];
1263
1264       b0 = vlib_get_buffer (vm, bi0);
1265       b1 = vlib_get_buffer (vm, bi1);
1266
1267       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1268         {
1269           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1270           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1271           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1272           t0->fib_index =
1273             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1274              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1275             vec_elt (im->fib_index_by_sw_if_index,
1276                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1277
1278           clib_memcpy_fast (t0->packet_data,
1279                             vlib_buffer_get_current (b0),
1280                             sizeof (t0->packet_data));
1281         }
1282       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1283         {
1284           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1285           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1286           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1287           t1->fib_index =
1288             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1289              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1290             vec_elt (im->fib_index_by_sw_if_index,
1291                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1292           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1293                             sizeof (t1->packet_data));
1294         }
1295       from += 2;
1296       n_left -= 2;
1297     }
1298
1299   while (n_left >= 1)
1300     {
1301       u32 bi0;
1302       vlib_buffer_t *b0;
1303       ip4_forward_next_trace_t *t0;
1304
1305       bi0 = from[0];
1306
1307       b0 = vlib_get_buffer (vm, bi0);
1308
1309       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1310         {
1311           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1312           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1313           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1314           t0->fib_index =
1315             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1316              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1317             vec_elt (im->fib_index_by_sw_if_index,
1318                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1319           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1320                             sizeof (t0->packet_data));
1321         }
1322       from += 1;
1323       n_left -= 1;
1324     }
1325 }
1326
1327 /* Compute TCP/UDP/ICMP4 checksum in software. */
1328 u16
1329 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1330                               ip4_header_t * ip0)
1331 {
1332   ip_csum_t sum0;
1333   u32 ip_header_length, payload_length_host_byte_order;
1334
1335   /* Initialize checksum with ip header. */
1336   ip_header_length = ip4_header_bytes (ip0);
1337   payload_length_host_byte_order =
1338     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1339   sum0 =
1340     clib_host_to_net_u32 (payload_length_host_byte_order +
1341                           (ip0->protocol << 16));
1342
1343   if (BITS (uword) == 32)
1344     {
1345       sum0 =
1346         ip_csum_with_carry (sum0,
1347                             clib_mem_unaligned (&ip0->src_address, u32));
1348       sum0 =
1349         ip_csum_with_carry (sum0,
1350                             clib_mem_unaligned (&ip0->dst_address, u32));
1351     }
1352   else
1353     sum0 =
1354       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1355
1356   return ip_calculate_l4_checksum (vm, p0, sum0,
1357                                    payload_length_host_byte_order, (u8 *) ip0,
1358                                    ip_header_length, NULL);
1359 }
1360
1361 u32
1362 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1363 {
1364   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1365   udp_header_t *udp0;
1366   u16 sum16;
1367
1368   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1369           || ip0->protocol == IP_PROTOCOL_UDP);
1370
1371   udp0 = (void *) (ip0 + 1);
1372   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1373     {
1374       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1375                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1376       return p0->flags;
1377     }
1378
1379   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1380
1381   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1382                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1383
1384   return p0->flags;
1385 }
1386 #endif
1387
1388 /* *INDENT-OFF* */
1389 VNET_FEATURE_ARC_INIT (ip4_local) =
1390 {
1391   .arc_name  = "ip4-local",
1392   .start_nodes = VNET_FEATURES ("ip4-local"),
1393   .last_in_arc = "ip4-local-end-of-arc",
1394 };
1395 /* *INDENT-ON* */
1396
1397 static inline void
1398 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1399                             ip4_header_t * ip, u8 is_udp, u8 * error,
1400                             u8 * good_tcp_udp)
1401 {
1402   u32 flags0;
1403   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1404   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1405   if (is_udp)
1406     {
1407       udp_header_t *udp;
1408       u32 ip_len, udp_len;
1409       i32 len_diff;
1410       udp = ip4_next_header (ip);
1411       /* Verify UDP length. */
1412       ip_len = clib_net_to_host_u16 (ip->length);
1413       udp_len = clib_net_to_host_u16 (udp->length);
1414
1415       len_diff = ip_len - udp_len;
1416       *good_tcp_udp &= len_diff >= 0;
1417       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1418     }
1419 }
1420
1421 #define ip4_local_csum_is_offloaded(_b)                                       \
1422   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1423    (vnet_buffer (_b)->oflags &                                                \
1424     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1425
1426 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1427     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1428         || ip4_local_csum_is_offloaded (_b)))
1429
1430 #define ip4_local_csum_is_valid(_b)                                     \
1431     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1432         || (ip4_local_csum_is_offloaded (_b))) != 0
1433
1434 static inline void
1435 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1436                          ip4_header_t * ih, u8 * error)
1437 {
1438   u8 is_udp, is_tcp_udp, good_tcp_udp;
1439
1440   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1441   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1442
1443   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1444     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1445   else
1446     good_tcp_udp = ip4_local_csum_is_valid (b);
1447
1448   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1449   *error = (is_tcp_udp && !good_tcp_udp
1450             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1451 }
1452
1453 static inline void
1454 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1455                             ip4_header_t ** ih, u8 * error)
1456 {
1457   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1458
1459   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1460   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1461
1462   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1463   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1464
1465   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1466   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1467
1468   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1469                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1470     {
1471       if (is_tcp_udp[0])
1472         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1473                                     &good_tcp_udp[0]);
1474       if (is_tcp_udp[1])
1475         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1476                                     &good_tcp_udp[1]);
1477     }
1478
1479   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1480               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1481   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1482               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1483 }
1484
1485 static inline void
1486 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1487                               vlib_buffer_t * b, u16 * next, u8 error,
1488                               u8 head_of_feature_arc)
1489 {
1490   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1491   u32 next_index;
1492
1493   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1494   b->error = error ? error_node->errors[error] : 0;
1495   if (head_of_feature_arc)
1496     {
1497       next_index = *next;
1498       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1499         {
1500           vnet_feature_arc_start (arc_index,
1501                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1502                                   &next_index, b);
1503           *next = next_index;
1504         }
1505     }
1506 }
1507
1508 typedef struct
1509 {
1510   ip4_address_t src;
1511   u32 lbi;
1512   u8 error;
1513   u8 first;
1514 } ip4_local_last_check_t;
1515
1516 static inline void
1517 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1518                      ip4_local_last_check_t * last_check, u8 * error0)
1519 {
1520   ip4_fib_mtrie_leaf_t leaf0;
1521   ip4_fib_mtrie_t *mtrie0;
1522   const dpo_id_t *dpo0;
1523   load_balance_t *lb0;
1524   u32 lbi0;
1525
1526   vnet_buffer (b)->ip.fib_index =
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1528     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1529
1530   /*
1531    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1532    *  adjacency for the destination address (the local interface address).
1533    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1534    *  adjacency for the source address (the remote sender's address)
1535    */
1536   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1537       last_check->first)
1538     {
1539       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1540       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1541       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1542       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1543       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1544
1545       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1546         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1547       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1548
1549       lb0 = load_balance_get (lbi0);
1550       dpo0 = load_balance_get_bucket_i (lb0, 0);
1551
1552       /*
1553        * Must have a route to source otherwise we drop the packet.
1554        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1555        *
1556        * The checks are:
1557        *  - the source is a recieve => it's from us => bogus, do this
1558        *    first since it sets a different error code.
1559        *  - uRPF check for any route to source - accept if passes.
1560        *  - allow packets destined to the broadcast address from unknown sources
1561        */
1562
1563       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1565                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1566       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1567                   && !fib_urpf_check_size (lb0->lb_urpf)
1568                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1569                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1570
1571       last_check->src.as_u32 = ip0->src_address.as_u32;
1572       last_check->lbi = lbi0;
1573       last_check->error = *error0;
1574       last_check->first = 0;
1575     }
1576   else
1577     {
1578       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1579         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1580       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1581       *error0 = last_check->error;
1582     }
1583 }
1584
1585 static inline void
1586 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1587                         ip4_local_last_check_t * last_check, u8 * error)
1588 {
1589   ip4_fib_mtrie_leaf_t leaf[2];
1590   ip4_fib_mtrie_t *mtrie[2];
1591   const dpo_id_t *dpo[2];
1592   load_balance_t *lb[2];
1593   u32 not_last_hit;
1594   u32 lbi[2];
1595
1596   not_last_hit = last_check->first;
1597   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1598   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1599
1600   vnet_buffer (b[0])->ip.fib_index =
1601     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1602     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1603     vnet_buffer (b[0])->ip.fib_index;
1604
1605   vnet_buffer (b[1])->ip.fib_index =
1606     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1607     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1608     vnet_buffer (b[1])->ip.fib_index;
1609
1610   /*
1611    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1612    *  adjacency for the destination address (the local interface address).
1613    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1614    *  adjacency for the source address (the remote sender's address)
1615    */
1616   if (PREDICT_TRUE (not_last_hit))
1617     {
1618       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1619       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1620
1621       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1622       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1623
1624       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1625                                            &ip[0]->src_address, 2);
1626       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1627                                            &ip[1]->src_address, 2);
1628
1629       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1630                                            &ip[0]->src_address, 3);
1631       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1632                                            &ip[1]->src_address, 3);
1633
1634       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1635       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1636
1637       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1638         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1639       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1640
1641       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1642         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1643       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1644
1645       lb[0] = load_balance_get (lbi[0]);
1646       lb[1] = load_balance_get (lbi[1]);
1647
1648       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1649       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1650
1651       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1652                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1653                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1654       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1655                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1656                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1657                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1658
1659       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1660                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1661                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1662       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1663                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1664                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1665                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1666
1667       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1668       last_check->lbi = lbi[1];
1669       last_check->error = error[1];
1670       last_check->first = 0;
1671     }
1672   else
1673     {
1674       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1675         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1676       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1677
1678       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1679         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1680       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1681
1682       error[0] = last_check->error;
1683       error[1] = last_check->error;
1684     }
1685 }
1686
1687 enum ip_local_packet_type_e
1688 {
1689   IP_LOCAL_PACKET_TYPE_L4,
1690   IP_LOCAL_PACKET_TYPE_NAT,
1691   IP_LOCAL_PACKET_TYPE_FRAG,
1692 };
1693
1694 /**
1695  * Determine packet type and next node.
1696  *
1697  * The expectation is that all packets that are not L4 will skip
1698  * checksums and source checks.
1699  */
1700 always_inline u8
1701 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1702 {
1703   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1704
1705   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1706     {
1707       *next = IP_LOCAL_NEXT_REASSEMBLY;
1708       return IP_LOCAL_PACKET_TYPE_FRAG;
1709     }
1710   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1711     {
1712       *next = lm->local_next_by_ip_protocol[ip->protocol];
1713       return IP_LOCAL_PACKET_TYPE_NAT;
1714     }
1715
1716   *next = lm->local_next_by_ip_protocol[ip->protocol];
1717   return IP_LOCAL_PACKET_TYPE_L4;
1718 }
1719
1720 static inline uword
1721 ip4_local_inline (vlib_main_t * vm,
1722                   vlib_node_runtime_t * node,
1723                   vlib_frame_t * frame, int head_of_feature_arc)
1724 {
1725   u32 *from, n_left_from;
1726   vlib_node_runtime_t *error_node =
1727     vlib_node_get_runtime (vm, ip4_local_node.index);
1728   u16 nexts[VLIB_FRAME_SIZE], *next;
1729   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1730   ip4_header_t *ip[2];
1731   u8 error[2], pt[2];
1732
1733   ip4_local_last_check_t last_check = {
1734     /*
1735      * 0.0.0.0 can appear as the source address of an IP packet,
1736      * as can any other address, hence the need to use the 'first'
1737      * member to make sure the .lbi is initialised for the first
1738      * packet.
1739      */
1740     .src = {.as_u32 = 0},
1741     .lbi = ~0,
1742     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1743     .first = 1,
1744   };
1745
1746   from = vlib_frame_vector_args (frame);
1747   n_left_from = frame->n_vectors;
1748
1749   if (node->flags & VLIB_NODE_FLAG_TRACE)
1750     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1751
1752   vlib_get_buffers (vm, from, bufs, n_left_from);
1753   b = bufs;
1754   next = nexts;
1755
1756   while (n_left_from >= 6)
1757     {
1758       u8 not_batch = 0;
1759
1760       /* Prefetch next iteration. */
1761       {
1762         vlib_prefetch_buffer_header (b[4], LOAD);
1763         vlib_prefetch_buffer_header (b[5], LOAD);
1764
1765         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1766         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1767       }
1768
1769       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1770
1771       ip[0] = vlib_buffer_get_current (b[0]);
1772       ip[1] = vlib_buffer_get_current (b[1]);
1773
1774       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1775       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1776
1777       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1778       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1779
1780       not_batch = pt[0] ^ pt[1];
1781
1782       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1783         goto skip_checks;
1784
1785       if (PREDICT_TRUE (not_batch == 0))
1786         {
1787           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1788           ip4_local_check_src_x2 (b, ip, &last_check, error);
1789         }
1790       else
1791         {
1792           if (!pt[0])
1793             {
1794               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1795               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1796             }
1797           if (!pt[1])
1798             {
1799               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1800               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1801             }
1802         }
1803
1804     skip_checks:
1805
1806       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1807                                     head_of_feature_arc);
1808       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1809                                     head_of_feature_arc);
1810
1811       b += 2;
1812       next += 2;
1813       n_left_from -= 2;
1814     }
1815
1816   while (n_left_from > 0)
1817     {
1818       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1819
1820       ip[0] = vlib_buffer_get_current (b[0]);
1821       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1822       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1823
1824       if (head_of_feature_arc == 0 || pt[0])
1825         goto skip_check;
1826
1827       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1828       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1829
1830     skip_check:
1831
1832       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1833                                     head_of_feature_arc);
1834
1835       b += 1;
1836       next += 1;
1837       n_left_from -= 1;
1838     }
1839
1840   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1841   return frame->n_vectors;
1842 }
1843
1844 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1845                                vlib_frame_t * frame)
1846 {
1847   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1848 }
1849
1850 /* *INDENT-OFF* */
1851 VLIB_REGISTER_NODE (ip4_local_node) =
1852 {
1853   .name = "ip4-local",
1854   .vector_size = sizeof (u32),
1855   .format_trace = format_ip4_forward_next_trace,
1856   .n_errors = IP4_N_ERROR,
1857   .error_strings = ip4_error_strings,
1858   .n_next_nodes = IP_LOCAL_N_NEXT,
1859   .next_nodes =
1860   {
1861     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1862     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1863     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1864     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1865     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1866   },
1867 };
1868 /* *INDENT-ON* */
1869
1870
1871 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1872                                           vlib_node_runtime_t * node,
1873                                           vlib_frame_t * frame)
1874 {
1875   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1876 }
1877
1878 /* *INDENT-OFF* */
1879 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1880   .name = "ip4-local-end-of-arc",
1881   .vector_size = sizeof (u32),
1882
1883   .format_trace = format_ip4_forward_next_trace,
1884   .sibling_of = "ip4-local",
1885 };
1886
1887 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1888   .arc_name = "ip4-local",
1889   .node_name = "ip4-local-end-of-arc",
1890   .runs_before = 0, /* not before any other features */
1891 };
1892 /* *INDENT-ON* */
1893
1894 #ifndef CLIB_MARCH_VARIANT
1895 void
1896 ip4_register_protocol (u32 protocol, u32 node_index)
1897 {
1898   vlib_main_t *vm = vlib_get_main ();
1899   ip4_main_t *im = &ip4_main;
1900   ip_lookup_main_t *lm = &im->lookup_main;
1901
1902   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1903   lm->local_next_by_ip_protocol[protocol] =
1904     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1905 }
1906
1907 void
1908 ip4_unregister_protocol (u32 protocol)
1909 {
1910   ip4_main_t *im = &ip4_main;
1911   ip_lookup_main_t *lm = &im->lookup_main;
1912
1913   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1914   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1915 }
1916 #endif
1917
1918 static clib_error_t *
1919 show_ip_local_command_fn (vlib_main_t * vm,
1920                           unformat_input_t * input, vlib_cli_command_t * cmd)
1921 {
1922   ip4_main_t *im = &ip4_main;
1923   ip_lookup_main_t *lm = &im->lookup_main;
1924   int i;
1925
1926   vlib_cli_output (vm, "Protocols handled by ip4_local");
1927   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1928     {
1929       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1930         {
1931           u32 node_index = vlib_get_node (vm,
1932                                           ip4_local_node.index)->
1933             next_nodes[lm->local_next_by_ip_protocol[i]];
1934           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1935                            format_vlib_node_name, vm, node_index);
1936         }
1937     }
1938   return 0;
1939 }
1940
1941
1942
1943 /*?
1944  * Display the set of protocols handled by the local IPv4 stack.
1945  *
1946  * @cliexpar
1947  * Example of how to display local protocol table:
1948  * @cliexstart{show ip local}
1949  * Protocols handled by ip4_local
1950  * 1
1951  * 17
1952  * 47
1953  * @cliexend
1954 ?*/
1955 /* *INDENT-OFF* */
1956 VLIB_CLI_COMMAND (show_ip_local, static) =
1957 {
1958   .path = "show ip local",
1959   .function = show_ip_local_command_fn,
1960   .short_help = "show ip local",
1961 };
1962 /* *INDENT-ON* */
1963
1964 typedef enum
1965 {
1966   IP4_REWRITE_NEXT_DROP,
1967   IP4_REWRITE_NEXT_ICMP_ERROR,
1968   IP4_REWRITE_NEXT_FRAGMENT,
1969   IP4_REWRITE_N_NEXT            /* Last */
1970 } ip4_rewrite_next_t;
1971
1972 /**
1973  * This bits of an IPv4 address to mask to construct a multicast
1974  * MAC address
1975  */
1976 #if CLIB_ARCH_IS_BIG_ENDIAN
1977 #define IP4_MCAST_ADDR_MASK 0x007fffff
1978 #else
1979 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1980 #endif
1981
1982 always_inline void
1983 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1984                u16 adj_packet_bytes, bool df, u16 * next,
1985                u8 is_midchain, u32 * error)
1986 {
1987   if (packet_len > adj_packet_bytes)
1988     {
1989       *error = IP4_ERROR_MTU_EXCEEDED;
1990       if (df)
1991         {
1992           icmp4_error_set_vnet_buffer
1993             (b, ICMP4_destination_unreachable,
1994              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1995              adj_packet_bytes);
1996           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1997         }
1998       else
1999         {
2000           /* IP fragmentation */
2001           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2002                                    (is_midchain ?
2003                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2004                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2005           *next = IP4_REWRITE_NEXT_FRAGMENT;
2006         }
2007     }
2008 }
2009
2010 /* increment TTL & update checksum.
2011    Works either endian, so no need for byte swap. */
2012 static_always_inline void
2013 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2014 {
2015   i32 ttl;
2016   u32 checksum;
2017   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2018     return;
2019
2020   ttl = ip->ttl;
2021
2022   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2023   checksum += checksum >= 0xffff;
2024
2025   ip->checksum = checksum;
2026   ttl += 1;
2027   ip->ttl = ttl;
2028
2029   ASSERT (ip4_header_checksum_is_valid (ip));
2030 }
2031
2032 /* Decrement TTL & update checksum.
2033    Works either endian, so no need for byte swap. */
2034 static_always_inline void
2035 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2036                             u32 * error)
2037 {
2038   i32 ttl;
2039   u32 checksum;
2040   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2041     return;
2042
2043   ttl = ip->ttl;
2044
2045   /* Input node should have reject packets with ttl 0. */
2046   ASSERT (ip->ttl > 0);
2047
2048   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2049   checksum += checksum >= 0xffff;
2050
2051   ip->checksum = checksum;
2052   ttl -= 1;
2053   ip->ttl = ttl;
2054
2055   /*
2056    * If the ttl drops below 1 when forwarding, generate
2057    * an ICMP response.
2058    */
2059   if (PREDICT_FALSE (ttl <= 0))
2060     {
2061       *error = IP4_ERROR_TIME_EXPIRED;
2062       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2063       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2064                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2065                                    0);
2066       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2067     }
2068
2069   /* Verify checksum. */
2070   ASSERT (ip4_header_checksum_is_valid (ip) ||
2071           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
2072 }
2073
2074 always_inline uword
2075 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2076                     vlib_frame_t *frame, int do_counters, int is_midchain,
2077                     int is_mcast)
2078 {
2079   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2080   u32 *from = vlib_frame_vector_args (frame);
2081   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2082   u16 nexts[VLIB_FRAME_SIZE], *next;
2083   u32 n_left_from;
2084   vlib_node_runtime_t *error_node =
2085     vlib_node_get_runtime (vm, ip4_input_node.index);
2086
2087   n_left_from = frame->n_vectors;
2088   u32 thread_index = vm->thread_index;
2089
2090   vlib_get_buffers (vm, from, bufs, n_left_from);
2091   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2092
2093 #if (CLIB_N_PREFETCHES >= 8)
2094   if (n_left_from >= 6)
2095     {
2096       int i;
2097       for (i = 2; i < 6; i++)
2098         vlib_prefetch_buffer_header (bufs[i], LOAD);
2099     }
2100
2101   next = nexts;
2102   b = bufs;
2103   while (n_left_from >= 8)
2104     {
2105       const ip_adjacency_t *adj0, *adj1;
2106       ip4_header_t *ip0, *ip1;
2107       u32 rw_len0, error0, adj_index0;
2108       u32 rw_len1, error1, adj_index1;
2109       u32 tx_sw_if_index0, tx_sw_if_index1;
2110       u8 *p;
2111
2112       if (is_midchain)
2113         {
2114           vlib_prefetch_buffer_header (b[6], LOAD);
2115           vlib_prefetch_buffer_header (b[7], LOAD);
2116         }
2117
2118       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2119       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2120
2121       /*
2122        * pre-fetch the per-adjacency counters
2123        */
2124       if (do_counters)
2125         {
2126           vlib_prefetch_combined_counter (&adjacency_counters,
2127                                           thread_index, adj_index0);
2128           vlib_prefetch_combined_counter (&adjacency_counters,
2129                                           thread_index, adj_index1);
2130         }
2131
2132       ip0 = vlib_buffer_get_current (b[0]);
2133       ip1 = vlib_buffer_get_current (b[1]);
2134
2135       error0 = error1 = IP4_ERROR_NONE;
2136
2137       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2138       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2139
2140       /* Rewrite packet header and updates lengths. */
2141       adj0 = adj_get (adj_index0);
2142       adj1 = adj_get (adj_index1);
2143
2144       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2145       rw_len0 = adj0[0].rewrite_header.data_bytes;
2146       rw_len1 = adj1[0].rewrite_header.data_bytes;
2147       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2148       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2149
2150       p = vlib_buffer_get_current (b[2]);
2151       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2152       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2153
2154       p = vlib_buffer_get_current (b[3]);
2155       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2156       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2157
2158       /* Check MTU of outgoing interface. */
2159       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2160       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2161
2162       if (b[0]->flags & VNET_BUFFER_F_GSO)
2163         ip0_len = gso_mtu_sz (b[0]);
2164       if (b[1]->flags & VNET_BUFFER_F_GSO)
2165         ip1_len = gso_mtu_sz (b[1]);
2166
2167       ip4_mtu_check (b[0], ip0_len,
2168                      adj0[0].rewrite_header.max_l3_packet_bytes,
2169                      ip0->flags_and_fragment_offset &
2170                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2171                      next + 0, is_midchain, &error0);
2172       ip4_mtu_check (b[1], ip1_len,
2173                      adj1[0].rewrite_header.max_l3_packet_bytes,
2174                      ip1->flags_and_fragment_offset &
2175                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2176                      next + 1, is_midchain, &error1);
2177
2178       if (is_mcast)
2179         {
2180           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2181                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2182                     IP4_ERROR_SAME_INTERFACE : error0);
2183           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2184                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2185                     IP4_ERROR_SAME_INTERFACE : error1);
2186         }
2187
2188       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2189        * to see the IP header */
2190       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2191         {
2192           u32 next_index = adj0[0].rewrite_header.next_index;
2193           vlib_buffer_advance (b[0], -(word) rw_len0);
2194
2195           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2196           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2197
2198           if (PREDICT_FALSE
2199               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2200             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2201                                                 tx_sw_if_index0,
2202                                                 &next_index, b[0],
2203                                                 adj0->ia_cfg_index);
2204
2205           next[0] = next_index;
2206           if (is_midchain)
2207             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2208                                         0 /* is_ip6 */ );
2209         }
2210       else
2211         {
2212           b[0]->error = error_node->errors[error0];
2213           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2214             ip4_ttl_inc (b[0], ip0);
2215         }
2216       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2217         {
2218           u32 next_index = adj1[0].rewrite_header.next_index;
2219           vlib_buffer_advance (b[1], -(word) rw_len1);
2220
2221           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2222           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2223
2224           if (PREDICT_FALSE
2225               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2226             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2227                                                 tx_sw_if_index1,
2228                                                 &next_index, b[1],
2229                                                 adj1->ia_cfg_index);
2230           next[1] = next_index;
2231           if (is_midchain)
2232             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2233                                         0 /* is_ip6 */ );
2234         }
2235       else
2236         {
2237           b[1]->error = error_node->errors[error1];
2238           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2239             ip4_ttl_inc (b[1], ip1);
2240         }
2241
2242       if (is_midchain)
2243         /* Guess we are only writing on ipv4 header. */
2244         vnet_rewrite_two_headers (adj0[0], adj1[0],
2245                                   ip0, ip1, sizeof (ip4_header_t));
2246       else
2247         /* Guess we are only writing on simple Ethernet header. */
2248         vnet_rewrite_two_headers (adj0[0], adj1[0],
2249                                   ip0, ip1, sizeof (ethernet_header_t));
2250
2251       if (do_counters)
2252         {
2253           if (error0 == IP4_ERROR_NONE)
2254             vlib_increment_combined_counter
2255               (&adjacency_counters,
2256                thread_index,
2257                adj_index0, 1,
2258                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2259
2260           if (error1 == IP4_ERROR_NONE)
2261             vlib_increment_combined_counter
2262               (&adjacency_counters,
2263                thread_index,
2264                adj_index1, 1,
2265                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2266         }
2267
2268       if (is_midchain)
2269         {
2270           if (error0 == IP4_ERROR_NONE)
2271             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2272           if (error1 == IP4_ERROR_NONE)
2273             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2274         }
2275
2276       if (is_mcast)
2277         {
2278           /* copy bytes from the IP address into the MAC rewrite */
2279           if (error0 == IP4_ERROR_NONE)
2280             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2281                                         adj0->rewrite_header.dst_mcast_offset,
2282                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2283           if (error1 == IP4_ERROR_NONE)
2284             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2285                                         adj1->rewrite_header.dst_mcast_offset,
2286                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2287         }
2288
2289       next += 2;
2290       b += 2;
2291       n_left_from -= 2;
2292     }
2293 #elif (CLIB_N_PREFETCHES >= 4)
2294   next = nexts;
2295   b = bufs;
2296   while (n_left_from >= 1)
2297     {
2298       ip_adjacency_t *adj0;
2299       ip4_header_t *ip0;
2300       u32 rw_len0, error0, adj_index0;
2301       u32 tx_sw_if_index0;
2302       u8 *p;
2303
2304       /* Prefetch next iteration */
2305       if (PREDICT_TRUE (n_left_from >= 4))
2306         {
2307           ip_adjacency_t *adj2;
2308           u32 adj_index2;
2309
2310           vlib_prefetch_buffer_header (b[3], LOAD);
2311           vlib_prefetch_buffer_data (b[2], LOAD);
2312
2313           /* Prefetch adj->rewrite_header */
2314           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2315           adj2 = adj_get (adj_index2);
2316           p = (u8 *) adj2;
2317           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2318                          LOAD);
2319         }
2320
2321       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2322
2323       /*
2324        * Prefetch the per-adjacency counters
2325        */
2326       if (do_counters)
2327         {
2328           vlib_prefetch_combined_counter (&adjacency_counters,
2329                                           thread_index, adj_index0);
2330         }
2331
2332       ip0 = vlib_buffer_get_current (b[0]);
2333
2334       error0 = IP4_ERROR_NONE;
2335
2336       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2337
2338       /* Rewrite packet header and updates lengths. */
2339       adj0 = adj_get (adj_index0);
2340
2341       /* Rewrite header was prefetched. */
2342       rw_len0 = adj0[0].rewrite_header.data_bytes;
2343       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2344
2345       /* Check MTU of outgoing interface. */
2346       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2347
2348       if (b[0]->flags & VNET_BUFFER_F_GSO)
2349         ip0_len = gso_mtu_sz (b[0]);
2350
2351       ip4_mtu_check (b[0], ip0_len,
2352                      adj0[0].rewrite_header.max_l3_packet_bytes,
2353                      ip0->flags_and_fragment_offset &
2354                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2355                      next + 0, is_midchain, &error0);
2356
2357       if (is_mcast)
2358         {
2359           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2360                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2361                     IP4_ERROR_SAME_INTERFACE : error0);
2362         }
2363
2364       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2365        * to see the IP header */
2366       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2367         {
2368           u32 next_index = adj0[0].rewrite_header.next_index;
2369           vlib_buffer_advance (b[0], -(word) rw_len0);
2370           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2371           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2372
2373           if (PREDICT_FALSE
2374               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2375             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2376                                                 tx_sw_if_index0,
2377                                                 &next_index, b[0],
2378                                                 adj0->ia_cfg_index);
2379           next[0] = next_index;
2380
2381           if (is_midchain)
2382             {
2383               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2384                                           0 /* is_ip6 */ );
2385
2386               /* Guess we are only writing on ipv4 header. */
2387               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2388             }
2389           else
2390             /* Guess we are only writing on simple Ethernet header. */
2391             vnet_rewrite_one_header (adj0[0], ip0,
2392                                      sizeof (ethernet_header_t));
2393
2394           /*
2395            * Bump the per-adjacency counters
2396            */
2397           if (do_counters)
2398             vlib_increment_combined_counter
2399               (&adjacency_counters,
2400                thread_index,
2401                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2402                                                            b[0]) + rw_len0);
2403
2404           if (is_midchain)
2405             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2406
2407           if (is_mcast)
2408             /* copy bytes from the IP address into the MAC rewrite */
2409             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2410                                         adj0->rewrite_header.dst_mcast_offset,
2411                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2412         }
2413       else
2414         {
2415           b[0]->error = error_node->errors[error0];
2416           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2417             ip4_ttl_inc (b[0], ip0);
2418         }
2419
2420       next += 1;
2421       b += 1;
2422       n_left_from -= 1;
2423     }
2424 #endif
2425
2426   while (n_left_from > 0)
2427     {
2428       ip_adjacency_t *adj0;
2429       ip4_header_t *ip0;
2430       u32 rw_len0, adj_index0, error0;
2431       u32 tx_sw_if_index0;
2432
2433       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2434
2435       adj0 = adj_get (adj_index0);
2436
2437       if (do_counters)
2438         vlib_prefetch_combined_counter (&adjacency_counters,
2439                                         thread_index, adj_index0);
2440
2441       ip0 = vlib_buffer_get_current (b[0]);
2442
2443       error0 = IP4_ERROR_NONE;
2444
2445       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2446
2447
2448       /* Update packet buffer attributes/set output interface. */
2449       rw_len0 = adj0[0].rewrite_header.data_bytes;
2450       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2451
2452       /* Check MTU of outgoing interface. */
2453       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2454       if (b[0]->flags & VNET_BUFFER_F_GSO)
2455         ip0_len = gso_mtu_sz (b[0]);
2456
2457       ip4_mtu_check (b[0], ip0_len,
2458                      adj0[0].rewrite_header.max_l3_packet_bytes,
2459                      ip0->flags_and_fragment_offset &
2460                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2461                      next + 0, is_midchain, &error0);
2462
2463       if (is_mcast)
2464         {
2465           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2466                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2467                     IP4_ERROR_SAME_INTERFACE : error0);
2468         }
2469
2470       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2471        * to see the IP header */
2472       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2473         {
2474           u32 next_index = adj0[0].rewrite_header.next_index;
2475           vlib_buffer_advance (b[0], -(word) rw_len0);
2476           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2477           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2478
2479           if (PREDICT_FALSE
2480               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2481             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2482                                                 tx_sw_if_index0,
2483                                                 &next_index, b[0],
2484                                                 adj0->ia_cfg_index);
2485           next[0] = next_index;
2486
2487           if (is_midchain)
2488             {
2489               /* this acts on the packet that is about to be encapped */
2490               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2491                                           0 /* is_ip6 */ );
2492
2493               /* Guess we are only writing on ipv4 header. */
2494               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2495             }
2496           else
2497             /* Guess we are only writing on simple Ethernet header. */
2498             vnet_rewrite_one_header (adj0[0], ip0,
2499                                      sizeof (ethernet_header_t));
2500
2501           if (do_counters)
2502             vlib_increment_combined_counter
2503               (&adjacency_counters,
2504                thread_index, adj_index0, 1,
2505                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2506
2507           if (is_midchain)
2508             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2509
2510           if (is_mcast)
2511             /* copy bytes from the IP address into the MAC rewrite */
2512             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2513                                         adj0->rewrite_header.dst_mcast_offset,
2514                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2515         }
2516       else
2517         {
2518           b[0]->error = error_node->errors[error0];
2519           /* undo the TTL decrement - we'll be back to do it again */
2520           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2521             ip4_ttl_inc (b[0], ip0);
2522         }
2523
2524       next += 1;
2525       b += 1;
2526       n_left_from -= 1;
2527     }
2528
2529
2530   /* Need to do trace after rewrites to pick up new packet data. */
2531   if (node->flags & VLIB_NODE_FLAG_TRACE)
2532     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2533
2534   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2535   return frame->n_vectors;
2536 }
2537
2538 /** @brief IPv4 rewrite node.
2539     @node ip4-rewrite
2540
2541     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2542     header checksum, fetch the ip adjacency, check the outbound mtu,
2543     apply the adjacency rewrite, and send pkts to the adjacency
2544     rewrite header's rewrite_next_index.
2545
2546     @param vm vlib_main_t corresponding to the current thread
2547     @param node vlib_node_runtime_t
2548     @param frame vlib_frame_t whose contents should be dispatched
2549
2550     @par Graph mechanics: buffer metadata, next index usage
2551
2552     @em Uses:
2553     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2554         - the rewrite adjacency index
2555     - <code>adj->lookup_next_index</code>
2556         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2557           the packet will be dropped.
2558     - <code>adj->rewrite_header</code>
2559         - Rewrite string length, rewrite string, next_index
2560
2561     @em Sets:
2562     - <code>b->current_data, b->current_length</code>
2563         - Updated net of applying the rewrite string
2564
2565     <em>Next Indices:</em>
2566     - <code> adj->rewrite_header.next_index </code>
2567       or @c ip4-drop
2568 */
2569
2570 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2571                                  vlib_frame_t * frame)
2572 {
2573   if (adj_are_counters_enabled ())
2574     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2575   else
2576     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2577 }
2578
2579 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2580                                        vlib_node_runtime_t * node,
2581                                        vlib_frame_t * frame)
2582 {
2583   if (adj_are_counters_enabled ())
2584     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2585   else
2586     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2587 }
2588
2589 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2590                                   vlib_node_runtime_t * node,
2591                                   vlib_frame_t * frame)
2592 {
2593   if (adj_are_counters_enabled ())
2594     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2595   else
2596     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2597 }
2598
2599 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2600                                        vlib_node_runtime_t * node,
2601                                        vlib_frame_t * frame)
2602 {
2603   if (adj_are_counters_enabled ())
2604     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2605   else
2606     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2607 }
2608
2609 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2610                                         vlib_node_runtime_t * node,
2611                                         vlib_frame_t * frame)
2612 {
2613   if (adj_are_counters_enabled ())
2614     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2615   else
2616     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2617 }
2618
2619 /* *INDENT-OFF* */
2620 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2621   .name = "ip4-rewrite",
2622   .vector_size = sizeof (u32),
2623
2624   .format_trace = format_ip4_rewrite_trace,
2625
2626   .n_next_nodes = IP4_REWRITE_N_NEXT,
2627   .next_nodes = {
2628     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2629     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2630     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2631   },
2632 };
2633
2634 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2635   .name = "ip4-rewrite-bcast",
2636   .vector_size = sizeof (u32),
2637
2638   .format_trace = format_ip4_rewrite_trace,
2639   .sibling_of = "ip4-rewrite",
2640 };
2641
2642 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2643   .name = "ip4-rewrite-mcast",
2644   .vector_size = sizeof (u32),
2645
2646   .format_trace = format_ip4_rewrite_trace,
2647   .sibling_of = "ip4-rewrite",
2648 };
2649
2650 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2651   .name = "ip4-mcast-midchain",
2652   .vector_size = sizeof (u32),
2653
2654   .format_trace = format_ip4_rewrite_trace,
2655   .sibling_of = "ip4-rewrite",
2656 };
2657
2658 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2659   .name = "ip4-midchain",
2660   .vector_size = sizeof (u32),
2661   .format_trace = format_ip4_rewrite_trace,
2662   .sibling_of = "ip4-rewrite",
2663 };
2664 /* *INDENT-ON */
2665
2666 static int
2667 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2668 {
2669   ip4_fib_mtrie_t *mtrie0;
2670   ip4_fib_mtrie_leaf_t leaf0;
2671   u32 lbi0;
2672
2673   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2674
2675   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2676   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2677   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2678
2679   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2680
2681   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2682 }
2683
2684 static clib_error_t *
2685 test_lookup_command_fn (vlib_main_t * vm,
2686                         unformat_input_t * input, vlib_cli_command_t * cmd)
2687 {
2688   ip4_fib_t *fib;
2689   u32 table_id = 0;
2690   f64 count = 1;
2691   u32 n;
2692   int i;
2693   ip4_address_t ip4_base_address;
2694   u64 errors = 0;
2695
2696   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2697     {
2698       if (unformat (input, "table %d", &table_id))
2699         {
2700           /* Make sure the entry exists. */
2701           fib = ip4_fib_get (table_id);
2702           if ((fib) && (fib->index != table_id))
2703             return clib_error_return (0, "<fib-index> %d does not exist",
2704                                       table_id);
2705         }
2706       else if (unformat (input, "count %f", &count))
2707         ;
2708
2709       else if (unformat (input, "%U",
2710                          unformat_ip4_address, &ip4_base_address))
2711         ;
2712       else
2713         return clib_error_return (0, "unknown input `%U'",
2714                                   format_unformat_error, input);
2715     }
2716
2717   n = count;
2718
2719   for (i = 0; i < n; i++)
2720     {
2721       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2722         errors++;
2723
2724       ip4_base_address.as_u32 =
2725         clib_host_to_net_u32 (1 +
2726                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2727     }
2728
2729   if (errors)
2730     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2731   else
2732     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2733
2734   return 0;
2735 }
2736
2737 /*?
2738  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2739  * given FIB table to determine if there is a conflict with the
2740  * adjacency table. The fib-id can be determined by using the
2741  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2742  * of 0 is used.
2743  *
2744  * @todo This command uses fib-id, other commands use table-id (not
2745  * just a name, they are different indexes). Would like to change this
2746  * to table-id for consistency.
2747  *
2748  * @cliexpar
2749  * Example of how to run the test lookup command:
2750  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2751  * No errors in 2 lookups
2752  * @cliexend
2753 ?*/
2754 /* *INDENT-OFF* */
2755 VLIB_CLI_COMMAND (lookup_test_command, static) =
2756 {
2757   .path = "test lookup",
2758   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2759   .function = test_lookup_command_fn,
2760 };
2761 /* *INDENT-ON* */
2762
2763 static clib_error_t *
2764 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2765                              unformat_input_t * input,
2766                              vlib_cli_command_t * cmd)
2767 {
2768   int matched = 0;
2769   u32 table_id = 0;
2770   u32 flow_hash_config = 0;
2771   int rv;
2772
2773   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2774     {
2775       if (unformat (input, "table %d", &table_id))
2776         matched = 1;
2777 #define _(a, b, v)                                                            \
2778   else if (unformat (input, #a))                                              \
2779   {                                                                           \
2780     flow_hash_config |= v;                                                    \
2781     matched = 1;                                                              \
2782   }
2783       foreach_flow_hash_bit
2784 #undef _
2785         else
2786         break;
2787     }
2788
2789   if (matched == 0)
2790     return clib_error_return (0, "unknown input `%U'",
2791                               format_unformat_error, input);
2792
2793   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2794   switch (rv)
2795     {
2796     case 0:
2797       break;
2798
2799     case VNET_API_ERROR_NO_SUCH_FIB:
2800       return clib_error_return (0, "no such FIB table %d", table_id);
2801
2802     default:
2803       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2804       break;
2805     }
2806
2807   return 0;
2808 }
2809
2810 /*?
2811  * Configure the set of IPv4 fields used by the flow hash.
2812  *
2813  * @cliexpar
2814  * Example of how to set the flow hash on a given table:
2815  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2816  * Example of display the configured flow hash:
2817  * @cliexstart{show ip fib}
2818  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2819  * 0.0.0.0/0
2820  *   unicast-ip4-chain
2821  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2822  *     [0] [@0]: dpo-drop ip6
2823  * 0.0.0.0/32
2824  *   unicast-ip4-chain
2825  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2826  *     [0] [@0]: dpo-drop ip6
2827  * 224.0.0.0/8
2828  *   unicast-ip4-chain
2829  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2830  *     [0] [@0]: dpo-drop ip6
2831  * 6.0.1.2/32
2832  *   unicast-ip4-chain
2833  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2834  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2835  * 7.0.0.1/32
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2838  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2839  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2840  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2841  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2842  * 240.0.0.0/8
2843  *   unicast-ip4-chain
2844  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2845  *     [0] [@0]: dpo-drop ip6
2846  * 255.255.255.255/32
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2849  *     [0] [@0]: dpo-drop ip6
2850  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2851  * 0.0.0.0/0
2852  *   unicast-ip4-chain
2853  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2854  *     [0] [@0]: dpo-drop ip6
2855  * 0.0.0.0/32
2856  *   unicast-ip4-chain
2857  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2858  *     [0] [@0]: dpo-drop ip6
2859  * 172.16.1.0/24
2860  *   unicast-ip4-chain
2861  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2862  *     [0] [@4]: ipv4-glean: af_packet0
2863  * 172.16.1.1/32
2864  *   unicast-ip4-chain
2865  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2866  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2867  * 172.16.1.2/32
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2870  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2871  * 172.16.2.0/24
2872  *   unicast-ip4-chain
2873  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2874  *     [0] [@4]: ipv4-glean: af_packet1
2875  * 172.16.2.1/32
2876  *   unicast-ip4-chain
2877  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2878  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2879  * 224.0.0.0/8
2880  *   unicast-ip4-chain
2881  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2882  *     [0] [@0]: dpo-drop ip6
2883  * 240.0.0.0/8
2884  *   unicast-ip4-chain
2885  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2886  *     [0] [@0]: dpo-drop ip6
2887  * 255.255.255.255/32
2888  *   unicast-ip4-chain
2889  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2890  *     [0] [@0]: dpo-drop ip6
2891  * @cliexend
2892 ?*/
2893 /* *INDENT-OFF* */
2894 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2895 {
2896   .path = "set ip flow-hash",
2897   .short_help =
2898   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2899   .function = set_ip_flow_hash_command_fn,
2900 };
2901 /* *INDENT-ON* */
2902
2903 #ifndef CLIB_MARCH_VARIANT
2904 int
2905 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2906                              u32 table_index)
2907 {
2908   vnet_main_t *vnm = vnet_get_main ();
2909   vnet_interface_main_t *im = &vnm->interface_main;
2910   ip4_main_t *ipm = &ip4_main;
2911   ip_lookup_main_t *lm = &ipm->lookup_main;
2912   vnet_classify_main_t *cm = &vnet_classify_main;
2913   ip4_address_t *if_addr;
2914
2915   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2916     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2917
2918   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2919     return VNET_API_ERROR_NO_SUCH_ENTRY;
2920
2921   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2922   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2923
2924   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2925
2926   if (NULL != if_addr)
2927     {
2928       fib_prefix_t pfx = {
2929         .fp_len = 32,
2930         .fp_proto = FIB_PROTOCOL_IP4,
2931         .fp_addr.ip4 = *if_addr,
2932       };
2933       u32 fib_index;
2934
2935       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2936                                                        sw_if_index);
2937
2938
2939       if (table_index != (u32) ~ 0)
2940         {
2941           dpo_id_t dpo = DPO_INVALID;
2942
2943           dpo_set (&dpo,
2944                    DPO_CLASSIFY,
2945                    DPO_PROTO_IP4,
2946                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2947
2948           fib_table_entry_special_dpo_add (fib_index,
2949                                            &pfx,
2950                                            FIB_SOURCE_CLASSIFY,
2951                                            FIB_ENTRY_FLAG_NONE, &dpo);
2952           dpo_reset (&dpo);
2953         }
2954       else
2955         {
2956           fib_table_entry_special_remove (fib_index,
2957                                           &pfx, FIB_SOURCE_CLASSIFY);
2958         }
2959     }
2960
2961   return 0;
2962 }
2963 #endif
2964
2965 static clib_error_t *
2966 set_ip_classify_command_fn (vlib_main_t * vm,
2967                             unformat_input_t * input,
2968                             vlib_cli_command_t * cmd)
2969 {
2970   u32 table_index = ~0;
2971   int table_index_set = 0;
2972   u32 sw_if_index = ~0;
2973   int rv;
2974
2975   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2976     {
2977       if (unformat (input, "table-index %d", &table_index))
2978         table_index_set = 1;
2979       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2980                          vnet_get_main (), &sw_if_index))
2981         ;
2982       else
2983         break;
2984     }
2985
2986   if (table_index_set == 0)
2987     return clib_error_return (0, "classify table-index must be specified");
2988
2989   if (sw_if_index == ~0)
2990     return clib_error_return (0, "interface / subif must be specified");
2991
2992   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2993
2994   switch (rv)
2995     {
2996     case 0:
2997       break;
2998
2999     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3000       return clib_error_return (0, "No such interface");
3001
3002     case VNET_API_ERROR_NO_SUCH_ENTRY:
3003       return clib_error_return (0, "No such classifier table");
3004     }
3005   return 0;
3006 }
3007
3008 /*?
3009  * Assign a classification table to an interface. The classification
3010  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3011  * commands. Once the table is create, use this command to filter packets
3012  * on an interface.
3013  *
3014  * @cliexpar
3015  * Example of how to assign a classification table to an interface:
3016  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3017 ?*/
3018 /* *INDENT-OFF* */
3019 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3020 {
3021     .path = "set ip classify",
3022     .short_help =
3023     "set ip classify intfc <interface> table-index <classify-idx>",
3024     .function = set_ip_classify_command_fn,
3025 };
3026 /* *INDENT-ON* */
3027
3028 /*
3029  * fd.io coding-style-patch-verification: ON
3030  *
3031  * Local Variables:
3032  * eval: (c-set-style "gnu")
3033  * End:
3034  */