build: add missing openssl-devel package for centos-8 vpp-ext-deps
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   pfx_special.fp_len = a->address_length;
384   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386   /* set the glean route for the prefix */
387   fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                    FIB_SOURCE_INTERFACE,
389                                    (FIB_ENTRY_FLAG_CONNECTED |
390                                     FIB_ENTRY_FLAG_ATTACHED),
391                                    DPO_PROTO_IP4,
392                                    /* No next-hop address */
393                                    NULL,
394                                    sw_if_index,
395                                    /* invalid FIB index */
396                                    ~0,
397                                    1,
398                                    /* no out-label stack */
399                                    NULL,
400                                    FIB_ROUTE_PATH_FLAG_NONE);
401
402   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
403   if (a->address_length <= 30)
404     {
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if deleting last intf addr in prefix
532    *
533    * We're done now otherwise
534    */
535   if (if_prefix->ref_count > 0)
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541       /* Less work to do in FIB if we remove the covered /32s first */
542
543       /* first address in prefix */
544       pfx_special.fp_addr.ip4.as_u32 =
545         address->as_u32 & im->fib_masks[address_length];
546       pfx_special.fp_len = 32;
547
548       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
549         fib_table_entry_special_remove (fib_index,
550                                         &pfx_special,
551                                         FIB_SOURCE_INTERFACE);
552
553       /* prefix broadcast address */
554       pfx_special.fp_addr.ip4.as_u32 =
555         address->as_u32 | ~im->fib_masks[address_length];
556       pfx_special.fp_len = 32;
557
558       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559         fib_table_entry_special_remove (fib_index,
560                                         &pfx_special,
561                                         FIB_SOURCE_INTERFACE);
562     }
563   else if (address_length == 31)
564     {
565       /* length == 31, delete attached route for the other address */
566       pfx_special.fp_addr.ip4.as_u32 =
567         address->as_u32 ^ clib_host_to_net_u32(1);
568
569       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
570     }
571
572   /* remove glean route for prefix */
573   pfx_special.fp_addr.ip4 = *address;
574   pfx_special.fp_len = address_length;
575   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
576
577   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
578   pool_put (lm->if_prefix_pool, if_prefix);
579 }
580
581 static void
582 ip4_del_interface_routes (u32 sw_if_index,
583                           ip4_main_t * im,
584                           u32 fib_index,
585                           ip4_address_t * address, u32 address_length)
586 {
587   fib_prefix_t pfx = {
588     .fp_len = 32,
589     .fp_proto = FIB_PROTOCOL_IP4,
590     .fp_addr.ip4 = *address,
591   };
592
593   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
594
595   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
596                                    address, address_length);
597 }
598
599 #ifndef CLIB_MARCH_VARIANT
600 void
601 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
602 {
603   ip4_main_t *im = &ip4_main;
604   vnet_main_t *vnm = vnet_get_main ();
605   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
606
607   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
608
609   /*
610    * enable/disable only on the 1<->0 transition
611    */
612   if (is_enable)
613     {
614       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
615         return;
616     }
617   else
618     {
619       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
620       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
621         return;
622     }
623   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
624                                !is_enable, 0, 0);
625
626
627   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
628                                sw_if_index, !is_enable, 0, 0);
629
630   if (is_enable)
631     hi->l3_if_count++;
632   else if (hi->l3_if_count)
633     hi->l3_if_count--;
634
635   {
636     ip4_enable_disable_interface_callback_t *cb;
637     vec_foreach (cb, im->enable_disable_interface_callbacks)
638       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
639   }
640 }
641
642 static clib_error_t *
643 ip4_add_del_interface_address_internal (vlib_main_t * vm,
644                                         u32 sw_if_index,
645                                         ip4_address_t * address,
646                                         u32 address_length, u32 is_del)
647 {
648   vnet_main_t *vnm = vnet_get_main ();
649   ip4_main_t *im = &ip4_main;
650   ip_lookup_main_t *lm = &im->lookup_main;
651   clib_error_t *error = 0;
652   u32 if_address_index;
653   ip4_address_fib_t ip4_af, *addr_fib = 0;
654
655   /* local0 interface doesn't support IP addressing  */
656   if (sw_if_index == 0)
657     {
658       return
659        clib_error_create ("local0 interface doesn't support IP addressing");
660     }
661
662   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
663   ip4_addr_fib_init (&ip4_af, address,
664                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
665   vec_add1 (addr_fib, ip4_af);
666
667   /*
668    * there is no support for adj-fib handling in the presence of overlapping
669    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
670    * most routers do.
671    */
672   /* *INDENT-OFF* */
673   if (!is_del)
674     {
675       /* When adding an address check that it does not conflict
676          with an existing address on any interface in this table. */
677       ip_interface_address_t *ia;
678       vnet_sw_interface_t *sif;
679
680       pool_foreach (sif, vnm->interface_main.sw_interfaces)
681        {
682           if (im->fib_index_by_sw_if_index[sw_if_index] ==
683               im->fib_index_by_sw_if_index[sif->sw_if_index])
684             {
685               foreach_ip_interface_address
686                 (&im->lookup_main, ia, sif->sw_if_index,
687                  0 /* honor unnumbered */ ,
688                  ({
689                    ip4_address_t * x =
690                      ip_interface_address_get_address
691                      (&im->lookup_main, ia);
692
693                    if (ip4_destination_matches_route
694                        (im, address, x, ia->address_length) ||
695                        ip4_destination_matches_route (im,
696                                                       x,
697                                                       address,
698                                                       address_length))
699                      {
700                        /* an intf may have >1 addr from the same prefix */
701                        if ((sw_if_index == sif->sw_if_index) &&
702                            (ia->address_length == address_length) &&
703                            (x->as_u32 != address->as_u32))
704                          continue;
705
706                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
707                          /* if the address we're comparing against is stale
708                           * then the CP has not added this one back yet, maybe
709                           * it never will, so we have to assume it won't and
710                           * ignore it. if it does add it back, then it will fail
711                           * because this one is now present */
712                          continue;
713
714                        /* error if the length or intf was different */
715                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
716
717                        error = clib_error_create
718                          ("failed to add %U on %U which conflicts with %U for interface %U",
719                           format_ip4_address_and_length, address,
720                           address_length,
721                           format_vnet_sw_if_index_name, vnm,
722                           sw_if_index,
723                           format_ip4_address_and_length, x,
724                           ia->address_length,
725                           format_vnet_sw_if_index_name, vnm,
726                           sif->sw_if_index);
727                        goto done;
728                      }
729                  }));
730             }
731       }
732     }
733   /* *INDENT-ON* */
734
735   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
736
737   if (is_del)
738     {
739       if (~0 == if_address_index)
740         {
741           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
742           error = clib_error_create ("%U not found for interface %U",
743                                      lm->format_address_and_length,
744                                      addr_fib, address_length,
745                                      format_vnet_sw_if_index_name, vnm,
746                                      sw_if_index);
747           goto done;
748         }
749
750       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
751                                         address_length, sw_if_index);
752       if (error)
753         goto done;
754     }
755   else
756     {
757       if (~0 != if_address_index)
758         {
759           ip_interface_address_t *ia;
760
761           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
762
763           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
764             {
765               if (ia->sw_if_index == sw_if_index)
766                 {
767                   /* re-adding an address during the replace action.
768                    * consdier this the update. clear the flag and
769                    * we're done */
770                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
771                   goto done;
772                 }
773               else
774                 {
775                   /* The prefix is moving from one interface to another.
776                    * delete the stale and add the new */
777                   ip4_add_del_interface_address_internal (vm,
778                                                           ia->sw_if_index,
779                                                           address,
780                                                           address_length, 1);
781                   ia = NULL;
782                   error = ip_interface_address_add (lm, sw_if_index,
783                                                     addr_fib, address_length,
784                                                     &if_address_index);
785                 }
786             }
787           else
788             {
789               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
790               error = clib_error_create
791                 ("Prefix %U already found on interface %U",
792                  lm->format_address_and_length, addr_fib, address_length,
793                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
794             }
795         }
796       else
797         error = ip_interface_address_add (lm, sw_if_index,
798                                           addr_fib, address_length,
799                                           &if_address_index);
800     }
801
802   if (error)
803     goto done;
804
805   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
806   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
807
808   /* intf addr routes are added/deleted on admin up/down */
809   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
810     {
811       if (is_del)
812         ip4_del_interface_routes (sw_if_index,
813                                   im, ip4_af.fib_index, address,
814                                   address_length);
815       else
816         ip4_add_interface_routes (sw_if_index,
817                                   im, ip4_af.fib_index,
818                                   pool_elt_at_index
819                                   (lm->if_address_pool, if_address_index));
820     }
821
822   ip4_add_del_interface_address_callback_t *cb;
823   vec_foreach (cb, im->add_del_interface_address_callbacks)
824     cb->function (im, cb->function_opaque, sw_if_index,
825                   address, address_length, if_address_index, is_del);
826
827 done:
828   vec_free (addr_fib);
829   return error;
830 }
831
832 clib_error_t *
833 ip4_add_del_interface_address (vlib_main_t * vm,
834                                u32 sw_if_index,
835                                ip4_address_t * address,
836                                u32 address_length, u32 is_del)
837 {
838   return ip4_add_del_interface_address_internal
839     (vm, sw_if_index, address, address_length, is_del);
840 }
841
842 void
843 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
844 {
845   ip_interface_address_t *ia;
846   ip4_main_t *im;
847
848   im = &ip4_main;
849
850   /*
851    * when directed broadcast is enabled, the subnet braodcast route will forward
852    * packets using an adjacency with a broadcast MAC. otherwise it drops
853    */
854   /* *INDENT-OFF* */
855   foreach_ip_interface_address(&im->lookup_main, ia,
856                                sw_if_index, 0,
857      ({
858        if (ia->address_length <= 30)
859          {
860            ip4_address_t *ipa;
861
862            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
863
864            fib_prefix_t pfx = {
865              .fp_len = 32,
866              .fp_proto = FIB_PROTOCOL_IP4,
867              .fp_addr = {
868                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
869              },
870            };
871
872            ip4_add_subnet_bcast_route
873              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
874                                                   sw_if_index),
875               &pfx, sw_if_index);
876          }
877      }));
878   /* *INDENT-ON* */
879 }
880 #endif
881
882 static clib_error_t *
883 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
884 {
885   ip4_main_t *im = &ip4_main;
886   ip_interface_address_t *ia;
887   ip4_address_t *a;
888   u32 is_admin_up, fib_index;
889
890   /* Fill in lookup tables with default table (0). */
891   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
892
893   vec_validate_init_empty (im->
894                            lookup_main.if_address_pool_index_by_sw_if_index,
895                            sw_if_index, ~0);
896
897   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
898
899   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
900
901   /* *INDENT-OFF* */
902   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
903                                 0 /* honor unnumbered */,
904   ({
905     a = ip_interface_address_get_address (&im->lookup_main, ia);
906     if (is_admin_up)
907       ip4_add_interface_routes (sw_if_index,
908                                 im, fib_index,
909                                 ia);
910     else
911       ip4_del_interface_routes (sw_if_index,
912                                 im, fib_index,
913                                 a, ia->address_length);
914   }));
915   /* *INDENT-ON* */
916
917   return 0;
918 }
919
920 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
921
922 /* Built-in ip4 unicast rx feature path definition */
923 /* *INDENT-OFF* */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
925 {
926   .arc_name = "ip4-unicast",
927   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928   .last_in_arc = "ip4-lookup",
929   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
930 };
931
932 VNET_FEATURE_INIT (ip4_flow_classify, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-flow-classify",
936   .runs_before = VNET_FEATURES ("ip4-inacl"),
937 };
938
939 VNET_FEATURE_INIT (ip4_inacl, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-inacl",
943   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
944 };
945
946 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-source-and-port-range-check-rx",
950   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
951 };
952
953 VNET_FEATURE_INIT (ip4_policer_classify, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-policer-classify",
957   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
958 };
959
960 VNET_FEATURE_INIT (ip4_ipsec, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ipsec4-input-feature",
964   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
972 };
973
974 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-vxlan-bypass",
978   .runs_before = VNET_FEATURES ("ip4-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_not_enabled, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-not-enabled",
985   .runs_before = VNET_FEATURES ("ip4-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_lookup, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ip4-lookup",
992   .runs_before = 0,     /* not before any other features */
993 };
994
995 /* Built-in ip4 multicast rx feature path definition */
996 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
997 {
998   .arc_name = "ip4-multicast",
999   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1000   .last_in_arc = "ip4-mfib-forward-lookup",
1001   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "vpath-input-ip4",
1008   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1012 {
1013   .arc_name = "ip4-multicast",
1014   .node_name = "ip4-not-enabled",
1015   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1019 {
1020   .arc_name = "ip4-multicast",
1021   .node_name = "ip4-mfib-forward-lookup",
1022   .runs_before = 0,     /* last feature */
1023 };
1024
1025 /* Source and port-range check ip4 tx feature path definition */
1026 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1027 {
1028   .arc_name = "ip4-output",
1029   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1030   .last_in_arc = "interface-output",
1031   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ip4-source-and-port-range-check-tx",
1038   .runs_before = VNET_FEATURES ("ip4-outacl"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_outacl, static) =
1042 {
1043   .arc_name = "ip4-output",
1044   .node_name = "ip4-outacl",
1045   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1049 {
1050   .arc_name = "ip4-output",
1051   .node_name = "ipsec4-output-feature",
1052   .runs_before = VNET_FEATURES ("interface-output"),
1053 };
1054
1055 /* Built-in ip4 tx feature path definition */
1056 VNET_FEATURE_INIT (ip4_interface_output, static) =
1057 {
1058   .arc_name = "ip4-output",
1059   .node_name = "interface-output",
1060   .runs_before = 0,     /* not before any other features */
1061 };
1062 /* *INDENT-ON* */
1063
1064 static clib_error_t *
1065 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1066 {
1067   ip4_main_t *im = &ip4_main;
1068
1069   /* Fill in lookup tables with default table (0). */
1070   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1071   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1072
1073   if (!is_add)
1074     {
1075       ip4_main_t *im4 = &ip4_main;
1076       ip_lookup_main_t *lm4 = &im4->lookup_main;
1077       ip_interface_address_t *ia = 0;
1078       ip4_address_t *address;
1079       vlib_main_t *vm = vlib_get_main ();
1080
1081       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1082       /* *INDENT-OFF* */
1083       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1084       ({
1085         address = ip_interface_address_get_address (lm4, ia);
1086         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1087       }));
1088       /* *INDENT-ON* */
1089       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1090     }
1091
1092   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1093                                is_add, 0, 0);
1094
1095   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1096                                sw_if_index, is_add, 0, 0);
1097
1098   return /* no error */ 0;
1099 }
1100
1101 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1102
1103 /* Global IP4 main. */
1104 #ifndef CLIB_MARCH_VARIANT
1105 ip4_main_t ip4_main;
1106 #endif /* CLIB_MARCH_VARIANT */
1107
1108 static clib_error_t *
1109 ip4_lookup_init (vlib_main_t * vm)
1110 {
1111   ip4_main_t *im = &ip4_main;
1112   clib_error_t *error;
1113   uword i;
1114
1115   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1116     return error;
1117   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1118     return (error);
1119   if ((error = vlib_call_init_function (vm, fib_module_init)))
1120     return error;
1121   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1122     return error;
1123
1124   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1125     {
1126       u32 m;
1127
1128       if (i < 32)
1129         m = pow2_mask (i) << (32 - i);
1130       else
1131         m = ~0;
1132       im->fib_masks[i] = clib_host_to_net_u32 (m);
1133     }
1134
1135   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1136
1137   /* Create FIB with index 0 and table id of 0. */
1138   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1139                                      FIB_SOURCE_DEFAULT_ROUTE);
1140   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1141                                       MFIB_SOURCE_DEFAULT_ROUTE);
1142
1143   {
1144     pg_node_t *pn;
1145     pn = pg_get_node (ip4_lookup_node.index);
1146     pn->unformat_edit = unformat_pg_ip4_header;
1147   }
1148
1149   {
1150     ethernet_arp_header_t h;
1151
1152     clib_memset (&h, 0, sizeof (h));
1153
1154 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1155 #define _8(f,v) h.f = v;
1156     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1157     _16 (l3_type, ETHERNET_TYPE_IP4);
1158     _8 (n_l2_address_bytes, 6);
1159     _8 (n_l3_address_bytes, 4);
1160     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1161 #undef _16
1162 #undef _8
1163
1164     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1165                                /* data */ &h,
1166                                sizeof (h),
1167                                /* alloc chunk size */ 8,
1168                                "ip4 arp");
1169   }
1170
1171   return error;
1172 }
1173
1174 VLIB_INIT_FUNCTION (ip4_lookup_init);
1175
1176 typedef struct
1177 {
1178   /* Adjacency taken. */
1179   u32 dpo_index;
1180   u32 flow_hash;
1181   u32 fib_index;
1182
1183   /* Packet data, possibly *after* rewrite. */
1184   u8 packet_data[64 - 1 * sizeof (u32)];
1185 }
1186 ip4_forward_next_trace_t;
1187
1188 #ifndef CLIB_MARCH_VARIANT
1189 u8 *
1190 format_ip4_forward_next_trace (u8 * s, va_list * args)
1191 {
1192   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195   u32 indent = format_get_indent (s);
1196   s = format (s, "%U%U",
1197               format_white_space, indent,
1198               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1199   return s;
1200 }
1201 #endif
1202
1203 static u8 *
1204 format_ip4_lookup_trace (u8 * s, va_list * args)
1205 {
1206   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1207   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1208   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1209   u32 indent = format_get_indent (s);
1210
1211   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1212               t->fib_index, t->dpo_index, t->flow_hash);
1213   s = format (s, "\n%U%U",
1214               format_white_space, indent,
1215               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1216   return s;
1217 }
1218
1219 static u8 *
1220 format_ip4_rewrite_trace (u8 * s, va_list * args)
1221 {
1222   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1223   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1224   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1225   u32 indent = format_get_indent (s);
1226
1227   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1228               t->fib_index, t->dpo_index, format_ip_adjacency,
1229               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1230   s = format (s, "\n%U%U",
1231               format_white_space, indent,
1232               format_ip_adjacency_packet_data,
1233               t->packet_data, sizeof (t->packet_data));
1234   return s;
1235 }
1236
1237 #ifndef CLIB_MARCH_VARIANT
1238 /* Common trace function for all ip4-forward next nodes. */
1239 void
1240 ip4_forward_next_trace (vlib_main_t * vm,
1241                         vlib_node_runtime_t * node,
1242                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1243 {
1244   u32 *from, n_left;
1245   ip4_main_t *im = &ip4_main;
1246
1247   n_left = frame->n_vectors;
1248   from = vlib_frame_vector_args (frame);
1249
1250   while (n_left >= 4)
1251     {
1252       u32 bi0, bi1;
1253       vlib_buffer_t *b0, *b1;
1254       ip4_forward_next_trace_t *t0, *t1;
1255
1256       /* Prefetch next iteration. */
1257       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1258       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1259
1260       bi0 = from[0];
1261       bi1 = from[1];
1262
1263       b0 = vlib_get_buffer (vm, bi0);
1264       b1 = vlib_get_buffer (vm, bi1);
1265
1266       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1267         {
1268           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1269           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1270           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1271           t0->fib_index =
1272             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1273              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1274             vec_elt (im->fib_index_by_sw_if_index,
1275                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1276
1277           clib_memcpy_fast (t0->packet_data,
1278                             vlib_buffer_get_current (b0),
1279                             sizeof (t0->packet_data));
1280         }
1281       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1284           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1285           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1286           t1->fib_index =
1287             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1292                             sizeof (t1->packet_data));
1293         }
1294       from += 2;
1295       n_left -= 2;
1296     }
1297
1298   while (n_left >= 1)
1299     {
1300       u32 bi0;
1301       vlib_buffer_t *b0;
1302       ip4_forward_next_trace_t *t0;
1303
1304       bi0 = from[0];
1305
1306       b0 = vlib_get_buffer (vm, bi0);
1307
1308       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1309         {
1310           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1311           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1312           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1313           t0->fib_index =
1314             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1315              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1316             vec_elt (im->fib_index_by_sw_if_index,
1317                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1318           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1319                             sizeof (t0->packet_data));
1320         }
1321       from += 1;
1322       n_left -= 1;
1323     }
1324 }
1325
1326 /* Compute TCP/UDP/ICMP4 checksum in software. */
1327 u16
1328 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1329                               ip4_header_t * ip0)
1330 {
1331   ip_csum_t sum0;
1332   u32 ip_header_length, payload_length_host_byte_order;
1333
1334   /* Initialize checksum with ip header. */
1335   ip_header_length = ip4_header_bytes (ip0);
1336   payload_length_host_byte_order =
1337     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1338   sum0 =
1339     clib_host_to_net_u32 (payload_length_host_byte_order +
1340                           (ip0->protocol << 16));
1341
1342   if (BITS (uword) == 32)
1343     {
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->src_address, u32));
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->dst_address, u32));
1350     }
1351   else
1352     sum0 =
1353       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1354
1355   return ip_calculate_l4_checksum (vm, p0, sum0,
1356                                    payload_length_host_byte_order, (u8 *) ip0,
1357                                    ip_header_length, NULL);
1358 }
1359
1360 u32
1361 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 {
1363   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1364   udp_header_t *udp0;
1365   u16 sum16;
1366
1367   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1368           || ip0->protocol == IP_PROTOCOL_UDP);
1369
1370   udp0 = (void *) (ip0 + 1);
1371   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372     {
1373       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1374                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1375       return p0->flags;
1376     }
1377
1378   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379
1380   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1381                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1382
1383   return p0->flags;
1384 }
1385 #endif
1386
1387 /* *INDENT-OFF* */
1388 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 {
1390   .arc_name  = "ip4-local",
1391   .start_nodes = VNET_FEATURES ("ip4-local"),
1392   .last_in_arc = "ip4-local-end-of-arc",
1393 };
1394 /* *INDENT-ON* */
1395
1396 static inline void
1397 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1398                             ip4_header_t * ip, u8 is_udp, u8 * error,
1399                             u8 * good_tcp_udp)
1400 {
1401   u32 flags0;
1402   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1403   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1404   if (is_udp)
1405     {
1406       udp_header_t *udp;
1407       u32 ip_len, udp_len;
1408       i32 len_diff;
1409       udp = ip4_next_header (ip);
1410       /* Verify UDP length. */
1411       ip_len = clib_net_to_host_u16 (ip->length);
1412       udp_len = clib_net_to_host_u16 (udp->length);
1413
1414       len_diff = ip_len - udp_len;
1415       *good_tcp_udp &= len_diff >= 0;
1416       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1417     }
1418 }
1419
1420 #define ip4_local_csum_is_offloaded(_b)                                 \
1421     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1422         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1423
1424 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1425     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1426         || ip4_local_csum_is_offloaded (_b)))
1427
1428 #define ip4_local_csum_is_valid(_b)                                     \
1429     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1430         || (ip4_local_csum_is_offloaded (_b))) != 0
1431
1432 static inline void
1433 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1434                          ip4_header_t * ih, u8 * error)
1435 {
1436   u8 is_udp, is_tcp_udp, good_tcp_udp;
1437
1438   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1439   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1440
1441   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1442     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1443   else
1444     good_tcp_udp = ip4_local_csum_is_valid (b);
1445
1446   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1447   *error = (is_tcp_udp && !good_tcp_udp
1448             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1449 }
1450
1451 static inline void
1452 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1453                             ip4_header_t ** ih, u8 * error)
1454 {
1455   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1456
1457   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1458   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1459
1460   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1461   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1462
1463   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1464   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1465
1466   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1467                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1468     {
1469       if (is_tcp_udp[0])
1470         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1471                                     &good_tcp_udp[0]);
1472       if (is_tcp_udp[1])
1473         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1474                                     &good_tcp_udp[1]);
1475     }
1476
1477   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1478               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1479   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1480               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1481 }
1482
1483 static inline void
1484 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1485                               vlib_buffer_t * b, u16 * next, u8 error,
1486                               u8 head_of_feature_arc)
1487 {
1488   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1489   u32 next_index;
1490
1491   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1492   b->error = error ? error_node->errors[error] : 0;
1493   if (head_of_feature_arc)
1494     {
1495       next_index = *next;
1496       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1497         {
1498           vnet_feature_arc_start (arc_index,
1499                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1500                                   &next_index, b);
1501           *next = next_index;
1502         }
1503     }
1504 }
1505
1506 typedef struct
1507 {
1508   ip4_address_t src;
1509   u32 lbi;
1510   u8 error;
1511   u8 first;
1512 } ip4_local_last_check_t;
1513
1514 static inline void
1515 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1516                      ip4_local_last_check_t * last_check, u8 * error0)
1517 {
1518   ip4_fib_mtrie_leaf_t leaf0;
1519   ip4_fib_mtrie_t *mtrie0;
1520   const dpo_id_t *dpo0;
1521   load_balance_t *lb0;
1522   u32 lbi0;
1523
1524   vnet_buffer (b)->ip.fib_index =
1525     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1527
1528   /*
1529    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1530    *  adjacency for the destination address (the local interface address).
1531    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1532    *  adjacency for the source address (the remote sender's address)
1533    */
1534   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1535       last_check->first)
1536     {
1537       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1538       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1539       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1540       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1541       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1542
1543       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1544         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1545       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1546
1547       lb0 = load_balance_get (lbi0);
1548       dpo0 = load_balance_get_bucket_i (lb0, 0);
1549
1550       /*
1551        * Must have a route to source otherwise we drop the packet.
1552        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1553        *
1554        * The checks are:
1555        *  - the source is a recieve => it's from us => bogus, do this
1556        *    first since it sets a different error code.
1557        *  - uRPF check for any route to source - accept if passes.
1558        *  - allow packets destined to the broadcast address from unknown sources
1559        */
1560
1561       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1562                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1563                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1564       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1565                   && !fib_urpf_check_size (lb0->lb_urpf)
1566                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1567                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1568
1569       last_check->src.as_u32 = ip0->src_address.as_u32;
1570       last_check->lbi = lbi0;
1571       last_check->error = *error0;
1572       last_check->first = 0;
1573     }
1574   else
1575     {
1576       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1577         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1578       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1579       *error0 = last_check->error;
1580     }
1581 }
1582
1583 static inline void
1584 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1585                         ip4_local_last_check_t * last_check, u8 * error)
1586 {
1587   ip4_fib_mtrie_leaf_t leaf[2];
1588   ip4_fib_mtrie_t *mtrie[2];
1589   const dpo_id_t *dpo[2];
1590   load_balance_t *lb[2];
1591   u32 not_last_hit;
1592   u32 lbi[2];
1593
1594   not_last_hit = last_check->first;
1595   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1596   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1597
1598   vnet_buffer (b[0])->ip.fib_index =
1599     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1600     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1601     vnet_buffer (b[0])->ip.fib_index;
1602
1603   vnet_buffer (b[1])->ip.fib_index =
1604     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1605     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1606     vnet_buffer (b[1])->ip.fib_index;
1607
1608   /*
1609    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1610    *  adjacency for the destination address (the local interface address).
1611    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1612    *  adjacency for the source address (the remote sender's address)
1613    */
1614   if (PREDICT_TRUE (not_last_hit))
1615     {
1616       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1617       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1618
1619       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1620       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1621
1622       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1623                                            &ip[0]->src_address, 2);
1624       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1625                                            &ip[1]->src_address, 2);
1626
1627       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1628                                            &ip[0]->src_address, 3);
1629       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1630                                            &ip[1]->src_address, 3);
1631
1632       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1633       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1634
1635       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1636         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1637       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1638
1639       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1640         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1641       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1642
1643       lb[0] = load_balance_get (lbi[0]);
1644       lb[1] = load_balance_get (lbi[1]);
1645
1646       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1647       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1648
1649       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1650                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1651                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1652       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1653                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1654                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1655                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1656
1657       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1658                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1659                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1660       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1662                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1663                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1664
1665       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1666       last_check->lbi = lbi[1];
1667       last_check->error = error[1];
1668       last_check->first = 0;
1669     }
1670   else
1671     {
1672       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1673         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1674       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1675
1676       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1677         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1678       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1679
1680       error[0] = last_check->error;
1681       error[1] = last_check->error;
1682     }
1683 }
1684
1685 enum ip_local_packet_type_e
1686 {
1687   IP_LOCAL_PACKET_TYPE_L4,
1688   IP_LOCAL_PACKET_TYPE_NAT,
1689   IP_LOCAL_PACKET_TYPE_FRAG,
1690 };
1691
1692 /**
1693  * Determine packet type and next node.
1694  *
1695  * The expectation is that all packets that are not L4 will skip
1696  * checksums and source checks.
1697  */
1698 always_inline u8
1699 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1700 {
1701   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1702
1703   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1704     {
1705       *next = IP_LOCAL_NEXT_REASSEMBLY;
1706       return IP_LOCAL_PACKET_TYPE_FRAG;
1707     }
1708   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1709     {
1710       *next = lm->local_next_by_ip_protocol[ip->protocol];
1711       return IP_LOCAL_PACKET_TYPE_NAT;
1712     }
1713
1714   *next = lm->local_next_by_ip_protocol[ip->protocol];
1715   return IP_LOCAL_PACKET_TYPE_L4;
1716 }
1717
1718 static inline uword
1719 ip4_local_inline (vlib_main_t * vm,
1720                   vlib_node_runtime_t * node,
1721                   vlib_frame_t * frame, int head_of_feature_arc)
1722 {
1723   u32 *from, n_left_from;
1724   vlib_node_runtime_t *error_node =
1725     vlib_node_get_runtime (vm, ip4_local_node.index);
1726   u16 nexts[VLIB_FRAME_SIZE], *next;
1727   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1728   ip4_header_t *ip[2];
1729   u8 error[2], pt[2];
1730
1731   ip4_local_last_check_t last_check = {
1732     /*
1733      * 0.0.0.0 can appear as the source address of an IP packet,
1734      * as can any other address, hence the need to use the 'first'
1735      * member to make sure the .lbi is initialised for the first
1736      * packet.
1737      */
1738     .src = {.as_u32 = 0},
1739     .lbi = ~0,
1740     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1741     .first = 1,
1742   };
1743
1744   from = vlib_frame_vector_args (frame);
1745   n_left_from = frame->n_vectors;
1746
1747   if (node->flags & VLIB_NODE_FLAG_TRACE)
1748     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1749
1750   vlib_get_buffers (vm, from, bufs, n_left_from);
1751   b = bufs;
1752   next = nexts;
1753
1754   while (n_left_from >= 6)
1755     {
1756       u8 not_batch = 0;
1757
1758       /* Prefetch next iteration. */
1759       {
1760         vlib_prefetch_buffer_header (b[4], LOAD);
1761         vlib_prefetch_buffer_header (b[5], LOAD);
1762
1763         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1764         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1765       }
1766
1767       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1768
1769       ip[0] = vlib_buffer_get_current (b[0]);
1770       ip[1] = vlib_buffer_get_current (b[1]);
1771
1772       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1773       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1774
1775       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1776       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1777
1778       not_batch = pt[0] ^ pt[1];
1779
1780       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1781         goto skip_checks;
1782
1783       if (PREDICT_TRUE (not_batch == 0))
1784         {
1785           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1786           ip4_local_check_src_x2 (b, ip, &last_check, error);
1787         }
1788       else
1789         {
1790           if (!pt[0])
1791             {
1792               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1793               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1794             }
1795           if (!pt[1])
1796             {
1797               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1798               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1799             }
1800         }
1801
1802     skip_checks:
1803
1804       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1805                                     head_of_feature_arc);
1806       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1807                                     head_of_feature_arc);
1808
1809       b += 2;
1810       next += 2;
1811       n_left_from -= 2;
1812     }
1813
1814   while (n_left_from > 0)
1815     {
1816       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1817
1818       ip[0] = vlib_buffer_get_current (b[0]);
1819       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1820       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1821
1822       if (head_of_feature_arc == 0 || pt[0])
1823         goto skip_check;
1824
1825       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1826       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1827
1828     skip_check:
1829
1830       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1831                                     head_of_feature_arc);
1832
1833       b += 1;
1834       next += 1;
1835       n_left_from -= 1;
1836     }
1837
1838   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1839   return frame->n_vectors;
1840 }
1841
1842 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1843                                vlib_frame_t * frame)
1844 {
1845   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1846 }
1847
1848 /* *INDENT-OFF* */
1849 VLIB_REGISTER_NODE (ip4_local_node) =
1850 {
1851   .name = "ip4-local",
1852   .vector_size = sizeof (u32),
1853   .format_trace = format_ip4_forward_next_trace,
1854   .n_errors = IP4_N_ERROR,
1855   .error_strings = ip4_error_strings,
1856   .n_next_nodes = IP_LOCAL_N_NEXT,
1857   .next_nodes =
1858   {
1859     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1860     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1861     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1862     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1863     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1864   },
1865 };
1866 /* *INDENT-ON* */
1867
1868
1869 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1870                                           vlib_node_runtime_t * node,
1871                                           vlib_frame_t * frame)
1872 {
1873   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1874 }
1875
1876 /* *INDENT-OFF* */
1877 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1878   .name = "ip4-local-end-of-arc",
1879   .vector_size = sizeof (u32),
1880
1881   .format_trace = format_ip4_forward_next_trace,
1882   .sibling_of = "ip4-local",
1883 };
1884
1885 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1886   .arc_name = "ip4-local",
1887   .node_name = "ip4-local-end-of-arc",
1888   .runs_before = 0, /* not before any other features */
1889 };
1890 /* *INDENT-ON* */
1891
1892 #ifndef CLIB_MARCH_VARIANT
1893 void
1894 ip4_register_protocol (u32 protocol, u32 node_index)
1895 {
1896   vlib_main_t *vm = vlib_get_main ();
1897   ip4_main_t *im = &ip4_main;
1898   ip_lookup_main_t *lm = &im->lookup_main;
1899
1900   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1901   lm->local_next_by_ip_protocol[protocol] =
1902     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1903 }
1904
1905 void
1906 ip4_unregister_protocol (u32 protocol)
1907 {
1908   ip4_main_t *im = &ip4_main;
1909   ip_lookup_main_t *lm = &im->lookup_main;
1910
1911   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1912   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1913 }
1914 #endif
1915
1916 static clib_error_t *
1917 show_ip_local_command_fn (vlib_main_t * vm,
1918                           unformat_input_t * input, vlib_cli_command_t * cmd)
1919 {
1920   ip4_main_t *im = &ip4_main;
1921   ip_lookup_main_t *lm = &im->lookup_main;
1922   int i;
1923
1924   vlib_cli_output (vm, "Protocols handled by ip4_local");
1925   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1926     {
1927       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1928         {
1929           u32 node_index = vlib_get_node (vm,
1930                                           ip4_local_node.index)->
1931             next_nodes[lm->local_next_by_ip_protocol[i]];
1932           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1933                            format_vlib_node_name, vm, node_index);
1934         }
1935     }
1936   return 0;
1937 }
1938
1939
1940
1941 /*?
1942  * Display the set of protocols handled by the local IPv4 stack.
1943  *
1944  * @cliexpar
1945  * Example of how to display local protocol table:
1946  * @cliexstart{show ip local}
1947  * Protocols handled by ip4_local
1948  * 1
1949  * 17
1950  * 47
1951  * @cliexend
1952 ?*/
1953 /* *INDENT-OFF* */
1954 VLIB_CLI_COMMAND (show_ip_local, static) =
1955 {
1956   .path = "show ip local",
1957   .function = show_ip_local_command_fn,
1958   .short_help = "show ip local",
1959 };
1960 /* *INDENT-ON* */
1961
1962 typedef enum
1963 {
1964   IP4_REWRITE_NEXT_DROP,
1965   IP4_REWRITE_NEXT_ICMP_ERROR,
1966   IP4_REWRITE_NEXT_FRAGMENT,
1967   IP4_REWRITE_N_NEXT            /* Last */
1968 } ip4_rewrite_next_t;
1969
1970 /**
1971  * This bits of an IPv4 address to mask to construct a multicast
1972  * MAC address
1973  */
1974 #if CLIB_ARCH_IS_BIG_ENDIAN
1975 #define IP4_MCAST_ADDR_MASK 0x007fffff
1976 #else
1977 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1978 #endif
1979
1980 always_inline void
1981 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1982                u16 adj_packet_bytes, bool df, u16 * next,
1983                u8 is_midchain, u32 * error)
1984 {
1985   if (packet_len > adj_packet_bytes)
1986     {
1987       *error = IP4_ERROR_MTU_EXCEEDED;
1988       if (df)
1989         {
1990           icmp4_error_set_vnet_buffer
1991             (b, ICMP4_destination_unreachable,
1992              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1993              adj_packet_bytes);
1994           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1995         }
1996       else
1997         {
1998           /* IP fragmentation */
1999           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2000                                    (is_midchain ?
2001                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2002                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2003           *next = IP4_REWRITE_NEXT_FRAGMENT;
2004         }
2005     }
2006 }
2007
2008 /* increment TTL & update checksum.
2009    Works either endian, so no need for byte swap. */
2010 static_always_inline void
2011 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2012 {
2013   i32 ttl;
2014   u32 checksum;
2015   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2016     return;
2017
2018   ttl = ip->ttl;
2019
2020   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2021   checksum += checksum >= 0xffff;
2022
2023   ip->checksum = checksum;
2024   ttl += 1;
2025   ip->ttl = ttl;
2026
2027   ASSERT (ip4_header_checksum_is_valid (ip));
2028 }
2029
2030 /* Decrement TTL & update checksum.
2031    Works either endian, so no need for byte swap. */
2032 static_always_inline void
2033 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2034                             u32 * error)
2035 {
2036   i32 ttl;
2037   u32 checksum;
2038   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2039     return;
2040
2041   ttl = ip->ttl;
2042
2043   /* Input node should have reject packets with ttl 0. */
2044   ASSERT (ip->ttl > 0);
2045
2046   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2047   checksum += checksum >= 0xffff;
2048
2049   ip->checksum = checksum;
2050   ttl -= 1;
2051   ip->ttl = ttl;
2052
2053   /*
2054    * If the ttl drops below 1 when forwarding, generate
2055    * an ICMP response.
2056    */
2057   if (PREDICT_FALSE (ttl <= 0))
2058     {
2059       *error = IP4_ERROR_TIME_EXPIRED;
2060       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2061       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2062                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2063                                    0);
2064       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2065     }
2066
2067   /* Verify checksum. */
2068   ASSERT (ip4_header_checksum_is_valid (ip) ||
2069           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2070 }
2071
2072
2073 always_inline uword
2074 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2075                              vlib_node_runtime_t * node,
2076                              vlib_frame_t * frame,
2077                              int do_counters, int is_midchain, int is_mcast)
2078 {
2079   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2080   u32 *from = vlib_frame_vector_args (frame);
2081   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2082   u16 nexts[VLIB_FRAME_SIZE], *next;
2083   u32 n_left_from;
2084   vlib_node_runtime_t *error_node =
2085     vlib_node_get_runtime (vm, ip4_input_node.index);
2086
2087   n_left_from = frame->n_vectors;
2088   u32 thread_index = vm->thread_index;
2089
2090   vlib_get_buffers (vm, from, bufs, n_left_from);
2091   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2092
2093 #if (CLIB_N_PREFETCHES >= 8)
2094   if (n_left_from >= 6)
2095     {
2096       int i;
2097       for (i = 2; i < 6; i++)
2098         vlib_prefetch_buffer_header (bufs[i], LOAD);
2099     }
2100
2101   next = nexts;
2102   b = bufs;
2103   while (n_left_from >= 8)
2104     {
2105       const ip_adjacency_t *adj0, *adj1;
2106       ip4_header_t *ip0, *ip1;
2107       u32 rw_len0, error0, adj_index0;
2108       u32 rw_len1, error1, adj_index1;
2109       u32 tx_sw_if_index0, tx_sw_if_index1;
2110       u8 *p;
2111
2112       if (is_midchain)
2113         {
2114           vlib_prefetch_buffer_header (b[6], LOAD);
2115           vlib_prefetch_buffer_header (b[7], LOAD);
2116         }
2117
2118       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2119       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2120
2121       /*
2122        * pre-fetch the per-adjacency counters
2123        */
2124       if (do_counters)
2125         {
2126           vlib_prefetch_combined_counter (&adjacency_counters,
2127                                           thread_index, adj_index0);
2128           vlib_prefetch_combined_counter (&adjacency_counters,
2129                                           thread_index, adj_index1);
2130         }
2131
2132       ip0 = vlib_buffer_get_current (b[0]);
2133       ip1 = vlib_buffer_get_current (b[1]);
2134
2135       error0 = error1 = IP4_ERROR_NONE;
2136
2137       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2138       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2139
2140       /* Rewrite packet header and updates lengths. */
2141       adj0 = adj_get (adj_index0);
2142       adj1 = adj_get (adj_index1);
2143
2144       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2145       rw_len0 = adj0[0].rewrite_header.data_bytes;
2146       rw_len1 = adj1[0].rewrite_header.data_bytes;
2147       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2148       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2149
2150       p = vlib_buffer_get_current (b[2]);
2151       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2152       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2153
2154       p = vlib_buffer_get_current (b[3]);
2155       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2156       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2157
2158       /* Check MTU of outgoing interface. */
2159       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2160       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2161
2162       if (b[0]->flags & VNET_BUFFER_F_GSO)
2163         ip0_len = gso_mtu_sz (b[0]);
2164       if (b[1]->flags & VNET_BUFFER_F_GSO)
2165         ip1_len = gso_mtu_sz (b[1]);
2166
2167       ip4_mtu_check (b[0], ip0_len,
2168                      adj0[0].rewrite_header.max_l3_packet_bytes,
2169                      ip0->flags_and_fragment_offset &
2170                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2171                      next + 0, is_midchain, &error0);
2172       ip4_mtu_check (b[1], ip1_len,
2173                      adj1[0].rewrite_header.max_l3_packet_bytes,
2174                      ip1->flags_and_fragment_offset &
2175                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2176                      next + 1, is_midchain, &error1);
2177
2178       if (is_mcast)
2179         {
2180           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2181                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2182                     IP4_ERROR_SAME_INTERFACE : error0);
2183           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2184                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2185                     IP4_ERROR_SAME_INTERFACE : error1);
2186         }
2187
2188       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2189        * to see the IP header */
2190       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2191         {
2192           u32 next_index = adj0[0].rewrite_header.next_index;
2193           vlib_buffer_advance (b[0], -(word) rw_len0);
2194
2195           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2196           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2197
2198           if (PREDICT_FALSE
2199               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2200             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2201                                                 tx_sw_if_index0,
2202                                                 &next_index, b[0],
2203                                                 adj0->ia_cfg_index);
2204
2205           next[0] = next_index;
2206           if (is_midchain)
2207             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2208                                         0 /* is_ip6 */ );
2209         }
2210       else
2211         {
2212           b[0]->error = error_node->errors[error0];
2213           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2214             ip4_ttl_inc (b[0], ip0);
2215         }
2216       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2217         {
2218           u32 next_index = adj1[0].rewrite_header.next_index;
2219           vlib_buffer_advance (b[1], -(word) rw_len1);
2220
2221           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2222           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2223
2224           if (PREDICT_FALSE
2225               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2226             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2227                                                 tx_sw_if_index1,
2228                                                 &next_index, b[1],
2229                                                 adj1->ia_cfg_index);
2230           next[1] = next_index;
2231           if (is_midchain)
2232             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2233                                         0 /* is_ip6 */ );
2234         }
2235       else
2236         {
2237           b[1]->error = error_node->errors[error1];
2238           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2239             ip4_ttl_inc (b[1], ip1);
2240         }
2241
2242       if (is_midchain)
2243         /* Guess we are only writing on ipv4 header. */
2244         vnet_rewrite_two_headers (adj0[0], adj1[0],
2245                                   ip0, ip1, sizeof (ip4_header_t));
2246       else
2247         /* Guess we are only writing on simple Ethernet header. */
2248         vnet_rewrite_two_headers (adj0[0], adj1[0],
2249                                   ip0, ip1, sizeof (ethernet_header_t));
2250
2251       if (do_counters)
2252         {
2253           if (error0 == IP4_ERROR_NONE)
2254             vlib_increment_combined_counter
2255               (&adjacency_counters,
2256                thread_index,
2257                adj_index0, 1,
2258                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2259
2260           if (error1 == IP4_ERROR_NONE)
2261             vlib_increment_combined_counter
2262               (&adjacency_counters,
2263                thread_index,
2264                adj_index1, 1,
2265                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2266         }
2267
2268       if (is_midchain)
2269         {
2270           if (error0 == IP4_ERROR_NONE)
2271             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2272           if (error1 == IP4_ERROR_NONE)
2273             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2274         }
2275
2276       if (is_mcast)
2277         {
2278           /* copy bytes from the IP address into the MAC rewrite */
2279           if (error0 == IP4_ERROR_NONE)
2280             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2281                                         adj0->rewrite_header.dst_mcast_offset,
2282                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2283           if (error1 == IP4_ERROR_NONE)
2284             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2285                                         adj1->rewrite_header.dst_mcast_offset,
2286                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2287         }
2288
2289       next += 2;
2290       b += 2;
2291       n_left_from -= 2;
2292     }
2293 #elif (CLIB_N_PREFETCHES >= 4)
2294   next = nexts;
2295   b = bufs;
2296   while (n_left_from >= 1)
2297     {
2298       ip_adjacency_t *adj0;
2299       ip4_header_t *ip0;
2300       u32 rw_len0, error0, adj_index0;
2301       u32 tx_sw_if_index0;
2302       u8 *p;
2303
2304       /* Prefetch next iteration */
2305       if (PREDICT_TRUE (n_left_from >= 4))
2306         {
2307           ip_adjacency_t *adj2;
2308           u32 adj_index2;
2309
2310           vlib_prefetch_buffer_header (b[3], LOAD);
2311           vlib_prefetch_buffer_data (b[2], LOAD);
2312
2313           /* Prefetch adj->rewrite_header */
2314           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2315           adj2 = adj_get (adj_index2);
2316           p = (u8 *) adj2;
2317           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2318                          LOAD);
2319         }
2320
2321       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2322
2323       /*
2324        * Prefetch the per-adjacency counters
2325        */
2326       if (do_counters)
2327         {
2328           vlib_prefetch_combined_counter (&adjacency_counters,
2329                                           thread_index, adj_index0);
2330         }
2331
2332       ip0 = vlib_buffer_get_current (b[0]);
2333
2334       error0 = IP4_ERROR_NONE;
2335
2336       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2337
2338       /* Rewrite packet header and updates lengths. */
2339       adj0 = adj_get (adj_index0);
2340
2341       /* Rewrite header was prefetched. */
2342       rw_len0 = adj0[0].rewrite_header.data_bytes;
2343       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2344
2345       /* Check MTU of outgoing interface. */
2346       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2347
2348       if (b[0]->flags & VNET_BUFFER_F_GSO)
2349         ip0_len = gso_mtu_sz (b[0]);
2350
2351       ip4_mtu_check (b[0], ip0_len,
2352                      adj0[0].rewrite_header.max_l3_packet_bytes,
2353                      ip0->flags_and_fragment_offset &
2354                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2355                      next + 0, is_midchain, &error0);
2356
2357       if (is_mcast)
2358         {
2359           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2360                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2361                     IP4_ERROR_SAME_INTERFACE : error0);
2362         }
2363
2364       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2365        * to see the IP header */
2366       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2367         {
2368           u32 next_index = adj0[0].rewrite_header.next_index;
2369           vlib_buffer_advance (b[0], -(word) rw_len0);
2370           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2371           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2372
2373           if (PREDICT_FALSE
2374               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2375             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2376                                                 tx_sw_if_index0,
2377                                                 &next_index, b[0],
2378                                                 adj0->ia_cfg_index);
2379           next[0] = next_index;
2380
2381           if (is_midchain)
2382             {
2383               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2384                                           0 /* is_ip6 */ );
2385
2386               /* Guess we are only writing on ipv4 header. */
2387               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2388             }
2389           else
2390             /* Guess we are only writing on simple Ethernet header. */
2391             vnet_rewrite_one_header (adj0[0], ip0,
2392                                      sizeof (ethernet_header_t));
2393
2394           /*
2395            * Bump the per-adjacency counters
2396            */
2397           if (do_counters)
2398             vlib_increment_combined_counter
2399               (&adjacency_counters,
2400                thread_index,
2401                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2402                                                            b[0]) + rw_len0);
2403
2404           if (is_midchain)
2405             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2406
2407           if (is_mcast)
2408             /* copy bytes from the IP address into the MAC rewrite */
2409             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2410                                         adj0->rewrite_header.dst_mcast_offset,
2411                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2412         }
2413       else
2414         {
2415           b[0]->error = error_node->errors[error0];
2416           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2417             ip4_ttl_inc (b[0], ip0);
2418         }
2419
2420       next += 1;
2421       b += 1;
2422       n_left_from -= 1;
2423     }
2424 #endif
2425
2426   while (n_left_from > 0)
2427     {
2428       ip_adjacency_t *adj0;
2429       ip4_header_t *ip0;
2430       u32 rw_len0, adj_index0, error0;
2431       u32 tx_sw_if_index0;
2432
2433       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2434
2435       adj0 = adj_get (adj_index0);
2436
2437       if (do_counters)
2438         vlib_prefetch_combined_counter (&adjacency_counters,
2439                                         thread_index, adj_index0);
2440
2441       ip0 = vlib_buffer_get_current (b[0]);
2442
2443       error0 = IP4_ERROR_NONE;
2444
2445       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2446
2447
2448       /* Update packet buffer attributes/set output interface. */
2449       rw_len0 = adj0[0].rewrite_header.data_bytes;
2450       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2451
2452       /* Check MTU of outgoing interface. */
2453       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2454       if (b[0]->flags & VNET_BUFFER_F_GSO)
2455         ip0_len = gso_mtu_sz (b[0]);
2456
2457       ip4_mtu_check (b[0], ip0_len,
2458                      adj0[0].rewrite_header.max_l3_packet_bytes,
2459                      ip0->flags_and_fragment_offset &
2460                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2461                      next + 0, is_midchain, &error0);
2462
2463       if (is_mcast)
2464         {
2465           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2466                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2467                     IP4_ERROR_SAME_INTERFACE : error0);
2468         }
2469
2470       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2471        * to see the IP header */
2472       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2473         {
2474           u32 next_index = adj0[0].rewrite_header.next_index;
2475           vlib_buffer_advance (b[0], -(word) rw_len0);
2476           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2477           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2478
2479           if (PREDICT_FALSE
2480               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2481             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2482                                                 tx_sw_if_index0,
2483                                                 &next_index, b[0],
2484                                                 adj0->ia_cfg_index);
2485           next[0] = next_index;
2486
2487           if (is_midchain)
2488             {
2489               /* this acts on the packet that is about to be encapped */
2490               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2491                                           0 /* is_ip6 */ );
2492
2493               /* Guess we are only writing on ipv4 header. */
2494               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2495             }
2496           else
2497             /* Guess we are only writing on simple Ethernet header. */
2498             vnet_rewrite_one_header (adj0[0], ip0,
2499                                      sizeof (ethernet_header_t));
2500
2501           if (do_counters)
2502             vlib_increment_combined_counter
2503               (&adjacency_counters,
2504                thread_index, adj_index0, 1,
2505                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2506
2507           if (is_midchain)
2508             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2509
2510           if (is_mcast)
2511             /* copy bytes from the IP address into the MAC rewrite */
2512             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2513                                         adj0->rewrite_header.dst_mcast_offset,
2514                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2515         }
2516       else
2517         {
2518           b[0]->error = error_node->errors[error0];
2519           /* undo the TTL decrement - we'll be back to do it again */
2520           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2521             ip4_ttl_inc (b[0], ip0);
2522         }
2523
2524       next += 1;
2525       b += 1;
2526       n_left_from -= 1;
2527     }
2528
2529
2530   /* Need to do trace after rewrites to pick up new packet data. */
2531   if (node->flags & VLIB_NODE_FLAG_TRACE)
2532     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2533
2534   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2535   return frame->n_vectors;
2536 }
2537
2538 always_inline uword
2539 ip4_rewrite_inline (vlib_main_t * vm,
2540                     vlib_node_runtime_t * node,
2541                     vlib_frame_t * frame,
2542                     int do_counters, int is_midchain, int is_mcast)
2543 {
2544   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2545                                       is_midchain, is_mcast);
2546 }
2547
2548
2549 /** @brief IPv4 rewrite node.
2550     @node ip4-rewrite
2551
2552     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2553     header checksum, fetch the ip adjacency, check the outbound mtu,
2554     apply the adjacency rewrite, and send pkts to the adjacency
2555     rewrite header's rewrite_next_index.
2556
2557     @param vm vlib_main_t corresponding to the current thread
2558     @param node vlib_node_runtime_t
2559     @param frame vlib_frame_t whose contents should be dispatched
2560
2561     @par Graph mechanics: buffer metadata, next index usage
2562
2563     @em Uses:
2564     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2565         - the rewrite adjacency index
2566     - <code>adj->lookup_next_index</code>
2567         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2568           the packet will be dropped.
2569     - <code>adj->rewrite_header</code>
2570         - Rewrite string length, rewrite string, next_index
2571
2572     @em Sets:
2573     - <code>b->current_data, b->current_length</code>
2574         - Updated net of applying the rewrite string
2575
2576     <em>Next Indices:</em>
2577     - <code> adj->rewrite_header.next_index </code>
2578       or @c ip4-drop
2579 */
2580
2581 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2582                                  vlib_frame_t * frame)
2583 {
2584   if (adj_are_counters_enabled ())
2585     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2586   else
2587     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2588 }
2589
2590 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2591                                        vlib_node_runtime_t * node,
2592                                        vlib_frame_t * frame)
2593 {
2594   if (adj_are_counters_enabled ())
2595     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2596   else
2597     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2598 }
2599
2600 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2601                                   vlib_node_runtime_t * node,
2602                                   vlib_frame_t * frame)
2603 {
2604   if (adj_are_counters_enabled ())
2605     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2606   else
2607     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2608 }
2609
2610 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2611                                        vlib_node_runtime_t * node,
2612                                        vlib_frame_t * frame)
2613 {
2614   if (adj_are_counters_enabled ())
2615     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2616   else
2617     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2618 }
2619
2620 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2621                                         vlib_node_runtime_t * node,
2622                                         vlib_frame_t * frame)
2623 {
2624   if (adj_are_counters_enabled ())
2625     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2626   else
2627     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2628 }
2629
2630 /* *INDENT-OFF* */
2631 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2632   .name = "ip4-rewrite",
2633   .vector_size = sizeof (u32),
2634
2635   .format_trace = format_ip4_rewrite_trace,
2636
2637   .n_next_nodes = IP4_REWRITE_N_NEXT,
2638   .next_nodes = {
2639     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2640     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2641     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2642   },
2643 };
2644
2645 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2646   .name = "ip4-rewrite-bcast",
2647   .vector_size = sizeof (u32),
2648
2649   .format_trace = format_ip4_rewrite_trace,
2650   .sibling_of = "ip4-rewrite",
2651 };
2652
2653 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2654   .name = "ip4-rewrite-mcast",
2655   .vector_size = sizeof (u32),
2656
2657   .format_trace = format_ip4_rewrite_trace,
2658   .sibling_of = "ip4-rewrite",
2659 };
2660
2661 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2662   .name = "ip4-mcast-midchain",
2663   .vector_size = sizeof (u32),
2664
2665   .format_trace = format_ip4_rewrite_trace,
2666   .sibling_of = "ip4-rewrite",
2667 };
2668
2669 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2670   .name = "ip4-midchain",
2671   .vector_size = sizeof (u32),
2672   .format_trace = format_ip4_rewrite_trace,
2673   .sibling_of = "ip4-rewrite",
2674 };
2675 /* *INDENT-ON */
2676
2677 static int
2678 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2679 {
2680   ip4_fib_mtrie_t *mtrie0;
2681   ip4_fib_mtrie_leaf_t leaf0;
2682   u32 lbi0;
2683
2684   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2685
2686   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2687   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2688   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2689
2690   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2691
2692   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2693 }
2694
2695 static clib_error_t *
2696 test_lookup_command_fn (vlib_main_t * vm,
2697                         unformat_input_t * input, vlib_cli_command_t * cmd)
2698 {
2699   ip4_fib_t *fib;
2700   u32 table_id = 0;
2701   f64 count = 1;
2702   u32 n;
2703   int i;
2704   ip4_address_t ip4_base_address;
2705   u64 errors = 0;
2706
2707   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2708     {
2709       if (unformat (input, "table %d", &table_id))
2710         {
2711           /* Make sure the entry exists. */
2712           fib = ip4_fib_get (table_id);
2713           if ((fib) && (fib->index != table_id))
2714             return clib_error_return (0, "<fib-index> %d does not exist",
2715                                       table_id);
2716         }
2717       else if (unformat (input, "count %f", &count))
2718         ;
2719
2720       else if (unformat (input, "%U",
2721                          unformat_ip4_address, &ip4_base_address))
2722         ;
2723       else
2724         return clib_error_return (0, "unknown input `%U'",
2725                                   format_unformat_error, input);
2726     }
2727
2728   n = count;
2729
2730   for (i = 0; i < n; i++)
2731     {
2732       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2733         errors++;
2734
2735       ip4_base_address.as_u32 =
2736         clib_host_to_net_u32 (1 +
2737                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2738     }
2739
2740   if (errors)
2741     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2742   else
2743     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2744
2745   return 0;
2746 }
2747
2748 /*?
2749  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2750  * given FIB table to determine if there is a conflict with the
2751  * adjacency table. The fib-id can be determined by using the
2752  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2753  * of 0 is used.
2754  *
2755  * @todo This command uses fib-id, other commands use table-id (not
2756  * just a name, they are different indexes). Would like to change this
2757  * to table-id for consistency.
2758  *
2759  * @cliexpar
2760  * Example of how to run the test lookup command:
2761  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2762  * No errors in 2 lookups
2763  * @cliexend
2764 ?*/
2765 /* *INDENT-OFF* */
2766 VLIB_CLI_COMMAND (lookup_test_command, static) =
2767 {
2768   .path = "test lookup",
2769   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2770   .function = test_lookup_command_fn,
2771 };
2772 /* *INDENT-ON* */
2773
2774 #ifndef CLIB_MARCH_VARIANT
2775 int
2776 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2777 {
2778   u32 fib_index;
2779
2780   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2781
2782   if (~0 == fib_index)
2783     return VNET_API_ERROR_NO_SUCH_FIB;
2784
2785   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2786                                   flow_hash_config);
2787
2788   return 0;
2789 }
2790 #endif
2791
2792 static clib_error_t *
2793 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2794                              unformat_input_t * input,
2795                              vlib_cli_command_t * cmd)
2796 {
2797   int matched = 0;
2798   u32 table_id = 0;
2799   u32 flow_hash_config = 0;
2800   int rv;
2801
2802   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2803     {
2804       if (unformat (input, "table %d", &table_id))
2805         matched = 1;
2806 #define _(a,v) \
2807     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2808       foreach_flow_hash_bit
2809 #undef _
2810         else
2811         break;
2812     }
2813
2814   if (matched == 0)
2815     return clib_error_return (0, "unknown input `%U'",
2816                               format_unformat_error, input);
2817
2818   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2819   switch (rv)
2820     {
2821     case 0:
2822       break;
2823
2824     case VNET_API_ERROR_NO_SUCH_FIB:
2825       return clib_error_return (0, "no such FIB table %d", table_id);
2826
2827     default:
2828       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2829       break;
2830     }
2831
2832   return 0;
2833 }
2834
2835 /*?
2836  * Configure the set of IPv4 fields used by the flow hash.
2837  *
2838  * @cliexpar
2839  * Example of how to set the flow hash on a given table:
2840  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2841  * Example of display the configured flow hash:
2842  * @cliexstart{show ip fib}
2843  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2844  * 0.0.0.0/0
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2847  *     [0] [@0]: dpo-drop ip6
2848  * 0.0.0.0/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2851  *     [0] [@0]: dpo-drop ip6
2852  * 224.0.0.0/8
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2855  *     [0] [@0]: dpo-drop ip6
2856  * 6.0.1.2/32
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2859  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2860  * 7.0.0.1/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2863  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2864  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2865  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2866  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2867  * 240.0.0.0/8
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2870  *     [0] [@0]: dpo-drop ip6
2871  * 255.255.255.255/32
2872  *   unicast-ip4-chain
2873  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2874  *     [0] [@0]: dpo-drop ip6
2875  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2876  * 0.0.0.0/0
2877  *   unicast-ip4-chain
2878  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2879  *     [0] [@0]: dpo-drop ip6
2880  * 0.0.0.0/32
2881  *   unicast-ip4-chain
2882  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2883  *     [0] [@0]: dpo-drop ip6
2884  * 172.16.1.0/24
2885  *   unicast-ip4-chain
2886  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2887  *     [0] [@4]: ipv4-glean: af_packet0
2888  * 172.16.1.1/32
2889  *   unicast-ip4-chain
2890  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2891  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2892  * 172.16.1.2/32
2893  *   unicast-ip4-chain
2894  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2895  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2896  * 172.16.2.0/24
2897  *   unicast-ip4-chain
2898  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2899  *     [0] [@4]: ipv4-glean: af_packet1
2900  * 172.16.2.1/32
2901  *   unicast-ip4-chain
2902  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2903  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2904  * 224.0.0.0/8
2905  *   unicast-ip4-chain
2906  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2907  *     [0] [@0]: dpo-drop ip6
2908  * 240.0.0.0/8
2909  *   unicast-ip4-chain
2910  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2911  *     [0] [@0]: dpo-drop ip6
2912  * 255.255.255.255/32
2913  *   unicast-ip4-chain
2914  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2915  *     [0] [@0]: dpo-drop ip6
2916  * @cliexend
2917 ?*/
2918 /* *INDENT-OFF* */
2919 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2920 {
2921   .path = "set ip flow-hash",
2922   .short_help =
2923   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2924   .function = set_ip_flow_hash_command_fn,
2925 };
2926 /* *INDENT-ON* */
2927
2928 #ifndef CLIB_MARCH_VARIANT
2929 int
2930 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2931                              u32 table_index)
2932 {
2933   vnet_main_t *vnm = vnet_get_main ();
2934   vnet_interface_main_t *im = &vnm->interface_main;
2935   ip4_main_t *ipm = &ip4_main;
2936   ip_lookup_main_t *lm = &ipm->lookup_main;
2937   vnet_classify_main_t *cm = &vnet_classify_main;
2938   ip4_address_t *if_addr;
2939
2940   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2941     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2942
2943   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2944     return VNET_API_ERROR_NO_SUCH_ENTRY;
2945
2946   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2947   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2948
2949   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2950
2951   if (NULL != if_addr)
2952     {
2953       fib_prefix_t pfx = {
2954         .fp_len = 32,
2955         .fp_proto = FIB_PROTOCOL_IP4,
2956         .fp_addr.ip4 = *if_addr,
2957       };
2958       u32 fib_index;
2959
2960       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2961                                                        sw_if_index);
2962
2963
2964       if (table_index != (u32) ~ 0)
2965         {
2966           dpo_id_t dpo = DPO_INVALID;
2967
2968           dpo_set (&dpo,
2969                    DPO_CLASSIFY,
2970                    DPO_PROTO_IP4,
2971                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2972
2973           fib_table_entry_special_dpo_add (fib_index,
2974                                            &pfx,
2975                                            FIB_SOURCE_CLASSIFY,
2976                                            FIB_ENTRY_FLAG_NONE, &dpo);
2977           dpo_reset (&dpo);
2978         }
2979       else
2980         {
2981           fib_table_entry_special_remove (fib_index,
2982                                           &pfx, FIB_SOURCE_CLASSIFY);
2983         }
2984     }
2985
2986   return 0;
2987 }
2988 #endif
2989
2990 static clib_error_t *
2991 set_ip_classify_command_fn (vlib_main_t * vm,
2992                             unformat_input_t * input,
2993                             vlib_cli_command_t * cmd)
2994 {
2995   u32 table_index = ~0;
2996   int table_index_set = 0;
2997   u32 sw_if_index = ~0;
2998   int rv;
2999
3000   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3001     {
3002       if (unformat (input, "table-index %d", &table_index))
3003         table_index_set = 1;
3004       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3005                          vnet_get_main (), &sw_if_index))
3006         ;
3007       else
3008         break;
3009     }
3010
3011   if (table_index_set == 0)
3012     return clib_error_return (0, "classify table-index must be specified");
3013
3014   if (sw_if_index == ~0)
3015     return clib_error_return (0, "interface / subif must be specified");
3016
3017   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3018
3019   switch (rv)
3020     {
3021     case 0:
3022       break;
3023
3024     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3025       return clib_error_return (0, "No such interface");
3026
3027     case VNET_API_ERROR_NO_SUCH_ENTRY:
3028       return clib_error_return (0, "No such classifier table");
3029     }
3030   return 0;
3031 }
3032
3033 /*?
3034  * Assign a classification table to an interface. The classification
3035  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3036  * commands. Once the table is create, use this command to filter packets
3037  * on an interface.
3038  *
3039  * @cliexpar
3040  * Example of how to assign a classification table to an interface:
3041  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3042 ?*/
3043 /* *INDENT-OFF* */
3044 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3045 {
3046     .path = "set ip classify",
3047     .short_help =
3048     "set ip classify intfc <interface> table-index <classify-idx>",
3049     .function = set_ip_classify_command_fn,
3050 };
3051 /* *INDENT-ON* */
3052
3053 /*
3054  * fd.io coding-style-patch-verification: ON
3055  *
3056  * Local Variables:
3057  * eval: (c-set-style "gnu")
3058  * End:
3059  */