9e8d164b51f6f4a11a33fc947d20a41bb013b051
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   pfx_special.fp_len = a->address_length;
384   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386   /* set the glean route for the prefix */
387   fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                    FIB_SOURCE_INTERFACE,
389                                    (FIB_ENTRY_FLAG_CONNECTED |
390                                     FIB_ENTRY_FLAG_ATTACHED),
391                                    DPO_PROTO_IP4,
392                                    /* No next-hop address */
393                                    NULL,
394                                    sw_if_index,
395                                    /* invalid FIB index */
396                                    ~0,
397                                    1,
398                                    /* no out-label stack */
399                                    NULL,
400                                    FIB_ROUTE_PATH_FLAG_NONE);
401
402   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
403   if (a->address_length <= 30)
404     {
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if deleting last intf addr in prefix
532    *
533    * We're done now otherwise
534    */
535   if (if_prefix->ref_count > 0)
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541       /* Less work to do in FIB if we remove the covered /32s first */
542
543       /* first address in prefix */
544       pfx_special.fp_addr.ip4.as_u32 =
545         address->as_u32 & im->fib_masks[address_length];
546       pfx_special.fp_len = 32;
547
548       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
549         fib_table_entry_special_remove (fib_index,
550                                         &pfx_special,
551                                         FIB_SOURCE_INTERFACE);
552
553       /* prefix broadcast address */
554       pfx_special.fp_addr.ip4.as_u32 =
555         address->as_u32 | ~im->fib_masks[address_length];
556       pfx_special.fp_len = 32;
557
558       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559         fib_table_entry_special_remove (fib_index,
560                                         &pfx_special,
561                                         FIB_SOURCE_INTERFACE);
562     }
563   else if (address_length == 31)
564     {
565       /* length == 31, delete attached route for the other address */
566       pfx_special.fp_addr.ip4.as_u32 =
567         address->as_u32 ^ clib_host_to_net_u32(1);
568
569       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
570     }
571
572   /* remove glean route for prefix */
573   pfx_special.fp_addr.ip4 = *address;
574   pfx_special.fp_len = address_length;
575   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
576
577   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
578   pool_put (lm->if_prefix_pool, if_prefix);
579 }
580
581 static void
582 ip4_del_interface_routes (u32 sw_if_index,
583                           ip4_main_t * im,
584                           u32 fib_index,
585                           ip4_address_t * address, u32 address_length)
586 {
587   fib_prefix_t pfx = {
588     .fp_len = 32,
589     .fp_proto = FIB_PROTOCOL_IP4,
590     .fp_addr.ip4 = *address,
591   };
592
593   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
594
595   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
596                                    address, address_length);
597 }
598
599 #ifndef CLIB_MARCH_VARIANT
600 void
601 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
602 {
603   ip4_main_t *im = &ip4_main;
604   vnet_main_t *vnm = vnet_get_main ();
605   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
606
607   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
608
609   /*
610    * enable/disable only on the 1<->0 transition
611    */
612   if (is_enable)
613     {
614       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
615         return;
616     }
617   else
618     {
619       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
620       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
621         return;
622     }
623   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
624                                !is_enable, 0, 0);
625
626
627   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
628                                sw_if_index, !is_enable, 0, 0);
629
630   if (is_enable)
631     hi->l3_if_count++;
632   else if (hi->l3_if_count)
633     hi->l3_if_count--;
634
635   {
636     ip4_enable_disable_interface_callback_t *cb;
637     vec_foreach (cb, im->enable_disable_interface_callbacks)
638       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
639   }
640 }
641
642 static clib_error_t *
643 ip4_add_del_interface_address_internal (vlib_main_t * vm,
644                                         u32 sw_if_index,
645                                         ip4_address_t * address,
646                                         u32 address_length, u32 is_del)
647 {
648   vnet_main_t *vnm = vnet_get_main ();
649   ip4_main_t *im = &ip4_main;
650   ip_lookup_main_t *lm = &im->lookup_main;
651   clib_error_t *error = 0;
652   u32 if_address_index;
653   ip4_address_fib_t ip4_af, *addr_fib = 0;
654
655   /* local0 interface doesn't support IP addressing  */
656   if (sw_if_index == 0)
657     {
658       return
659        clib_error_create ("local0 interface doesn't support IP addressing");
660     }
661
662   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
663   ip4_addr_fib_init (&ip4_af, address,
664                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
665   vec_add1 (addr_fib, ip4_af);
666
667   /*
668    * there is no support for adj-fib handling in the presence of overlapping
669    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
670    * most routers do.
671    */
672   /* *INDENT-OFF* */
673   if (!is_del)
674     {
675       /* When adding an address check that it does not conflict
676          with an existing address on any interface in this table. */
677       ip_interface_address_t *ia;
678       vnet_sw_interface_t *sif;
679
680       pool_foreach (sif, vnm->interface_main.sw_interfaces)
681        {
682           if (im->fib_index_by_sw_if_index[sw_if_index] ==
683               im->fib_index_by_sw_if_index[sif->sw_if_index])
684             {
685               foreach_ip_interface_address
686                 (&im->lookup_main, ia, sif->sw_if_index,
687                  0 /* honor unnumbered */ ,
688                  ({
689                    ip4_address_t * x =
690                      ip_interface_address_get_address
691                      (&im->lookup_main, ia);
692
693                    if (ip4_destination_matches_route
694                        (im, address, x, ia->address_length) ||
695                        ip4_destination_matches_route (im,
696                                                       x,
697                                                       address,
698                                                       address_length))
699                      {
700                        /* an intf may have >1 addr from the same prefix */
701                        if ((sw_if_index == sif->sw_if_index) &&
702                            (ia->address_length == address_length) &&
703                            (x->as_u32 != address->as_u32))
704                          continue;
705
706                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
707                          /* if the address we're comparing against is stale
708                           * then the CP has not added this one back yet, maybe
709                           * it never will, so we have to assume it won't and
710                           * ignore it. if it does add it back, then it will fail
711                           * because this one is now present */
712                          continue;
713
714                        /* error if the length or intf was different */
715                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
716
717                        error = clib_error_create
718                          ("failed to add %U on %U which conflicts with %U for interface %U",
719                           format_ip4_address_and_length, address,
720                           address_length,
721                           format_vnet_sw_if_index_name, vnm,
722                           sw_if_index,
723                           format_ip4_address_and_length, x,
724                           ia->address_length,
725                           format_vnet_sw_if_index_name, vnm,
726                           sif->sw_if_index);
727                        goto done;
728                      }
729                  }));
730             }
731       }
732     }
733   /* *INDENT-ON* */
734
735   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
736
737   if (is_del)
738     {
739       if (~0 == if_address_index)
740         {
741           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
742           error = clib_error_create ("%U not found for interface %U",
743                                      lm->format_address_and_length,
744                                      addr_fib, address_length,
745                                      format_vnet_sw_if_index_name, vnm,
746                                      sw_if_index);
747           goto done;
748         }
749
750       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
751                                         address_length, sw_if_index);
752       if (error)
753         goto done;
754     }
755   else
756     {
757       if (~0 != if_address_index)
758         {
759           ip_interface_address_t *ia;
760
761           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
762
763           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
764             {
765               if (ia->sw_if_index == sw_if_index)
766                 {
767                   /* re-adding an address during the replace action.
768                    * consdier this the update. clear the flag and
769                    * we're done */
770                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
771                   goto done;
772                 }
773               else
774                 {
775                   /* The prefix is moving from one interface to another.
776                    * delete the stale and add the new */
777                   ip4_add_del_interface_address_internal (vm,
778                                                           ia->sw_if_index,
779                                                           address,
780                                                           address_length, 1);
781                   ia = NULL;
782                   error = ip_interface_address_add (lm, sw_if_index,
783                                                     addr_fib, address_length,
784                                                     &if_address_index);
785                 }
786             }
787           else
788             {
789               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
790               error = clib_error_create
791                 ("Prefix %U already found on interface %U",
792                  lm->format_address_and_length, addr_fib, address_length,
793                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
794             }
795         }
796       else
797         error = ip_interface_address_add (lm, sw_if_index,
798                                           addr_fib, address_length,
799                                           &if_address_index);
800     }
801
802   if (error)
803     goto done;
804
805   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
806   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
807
808   /* intf addr routes are added/deleted on admin up/down */
809   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
810     {
811       if (is_del)
812         ip4_del_interface_routes (sw_if_index,
813                                   im, ip4_af.fib_index, address,
814                                   address_length);
815       else
816         ip4_add_interface_routes (sw_if_index,
817                                   im, ip4_af.fib_index,
818                                   pool_elt_at_index
819                                   (lm->if_address_pool, if_address_index));
820     }
821
822   ip4_add_del_interface_address_callback_t *cb;
823   vec_foreach (cb, im->add_del_interface_address_callbacks)
824     cb->function (im, cb->function_opaque, sw_if_index,
825                   address, address_length, if_address_index, is_del);
826
827 done:
828   vec_free (addr_fib);
829   return error;
830 }
831
832 clib_error_t *
833 ip4_add_del_interface_address (vlib_main_t * vm,
834                                u32 sw_if_index,
835                                ip4_address_t * address,
836                                u32 address_length, u32 is_del)
837 {
838   return ip4_add_del_interface_address_internal
839     (vm, sw_if_index, address, address_length, is_del);
840 }
841
842 void
843 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
844 {
845   ip_interface_address_t *ia;
846   ip4_main_t *im;
847
848   im = &ip4_main;
849
850   /*
851    * when directed broadcast is enabled, the subnet braodcast route will forward
852    * packets using an adjacency with a broadcast MAC. otherwise it drops
853    */
854   /* *INDENT-OFF* */
855   foreach_ip_interface_address(&im->lookup_main, ia,
856                                sw_if_index, 0,
857      ({
858        if (ia->address_length <= 30)
859          {
860            ip4_address_t *ipa;
861
862            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
863
864            fib_prefix_t pfx = {
865              .fp_len = 32,
866              .fp_proto = FIB_PROTOCOL_IP4,
867              .fp_addr = {
868                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
869              },
870            };
871
872            ip4_add_subnet_bcast_route
873              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
874                                                   sw_if_index),
875               &pfx, sw_if_index);
876          }
877      }));
878   /* *INDENT-ON* */
879 }
880 #endif
881
882 static clib_error_t *
883 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
884 {
885   ip4_main_t *im = &ip4_main;
886   ip_interface_address_t *ia;
887   ip4_address_t *a;
888   u32 is_admin_up, fib_index;
889
890   /* Fill in lookup tables with default table (0). */
891   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
892
893   vec_validate_init_empty (im->
894                            lookup_main.if_address_pool_index_by_sw_if_index,
895                            sw_if_index, ~0);
896
897   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
898
899   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
900
901   /* *INDENT-OFF* */
902   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
903                                 0 /* honor unnumbered */,
904   ({
905     a = ip_interface_address_get_address (&im->lookup_main, ia);
906     if (is_admin_up)
907       ip4_add_interface_routes (sw_if_index,
908                                 im, fib_index,
909                                 ia);
910     else
911       ip4_del_interface_routes (sw_if_index,
912                                 im, fib_index,
913                                 a, ia->address_length);
914   }));
915   /* *INDENT-ON* */
916
917   return 0;
918 }
919
920 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
921
922 /* Built-in ip4 unicast rx feature path definition */
923 /* *INDENT-OFF* */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
925 {
926   .arc_name = "ip4-unicast",
927   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928   .last_in_arc = "ip4-lookup",
929   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
930 };
931
932 VNET_FEATURE_INIT (ip4_flow_classify, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-flow-classify",
936   .runs_before = VNET_FEATURES ("ip4-inacl"),
937 };
938
939 VNET_FEATURE_INIT (ip4_inacl, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-inacl",
943   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
944 };
945
946 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-source-and-port-range-check-rx",
950   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
951 };
952
953 VNET_FEATURE_INIT (ip4_policer_classify, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-policer-classify",
957   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
958 };
959
960 VNET_FEATURE_INIT (ip4_ipsec, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ipsec4-input-feature",
964   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
972 };
973
974 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-vxlan-bypass",
978   .runs_before = VNET_FEATURES ("ip4-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_not_enabled, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-not-enabled",
985   .runs_before = VNET_FEATURES ("ip4-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_lookup, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ip4-lookup",
992   .runs_before = 0,     /* not before any other features */
993 };
994
995 /* Built-in ip4 multicast rx feature path definition */
996 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
997 {
998   .arc_name = "ip4-multicast",
999   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1000   .last_in_arc = "ip4-mfib-forward-lookup",
1001   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "vpath-input-ip4",
1008   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1012 {
1013   .arc_name = "ip4-multicast",
1014   .node_name = "ip4-not-enabled",
1015   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1019 {
1020   .arc_name = "ip4-multicast",
1021   .node_name = "ip4-mfib-forward-lookup",
1022   .runs_before = 0,     /* last feature */
1023 };
1024
1025 /* Source and port-range check ip4 tx feature path definition */
1026 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1027 {
1028   .arc_name = "ip4-output",
1029   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1030   .last_in_arc = "interface-output",
1031   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ip4-source-and-port-range-check-tx",
1038   .runs_before = VNET_FEATURES ("ip4-outacl"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_outacl, static) =
1042 {
1043   .arc_name = "ip4-output",
1044   .node_name = "ip4-outacl",
1045   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1049 {
1050   .arc_name = "ip4-output",
1051   .node_name = "ipsec4-output-feature",
1052   .runs_before = VNET_FEATURES ("interface-output"),
1053 };
1054
1055 /* Built-in ip4 tx feature path definition */
1056 VNET_FEATURE_INIT (ip4_interface_output, static) =
1057 {
1058   .arc_name = "ip4-output",
1059   .node_name = "interface-output",
1060   .runs_before = 0,     /* not before any other features */
1061 };
1062 /* *INDENT-ON* */
1063
1064 static clib_error_t *
1065 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1066 {
1067   ip4_main_t *im = &ip4_main;
1068
1069   /* Fill in lookup tables with default table (0). */
1070   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1071   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1072
1073   if (!is_add)
1074     {
1075       ip4_main_t *im4 = &ip4_main;
1076       ip_lookup_main_t *lm4 = &im4->lookup_main;
1077       ip_interface_address_t *ia = 0;
1078       ip4_address_t *address;
1079       vlib_main_t *vm = vlib_get_main ();
1080
1081       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1082       /* *INDENT-OFF* */
1083       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1084       ({
1085         address = ip_interface_address_get_address (lm4, ia);
1086         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1087       }));
1088       /* *INDENT-ON* */
1089       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1090     }
1091
1092   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1093                                is_add, 0, 0);
1094
1095   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1096                                sw_if_index, is_add, 0, 0);
1097
1098   return /* no error */ 0;
1099 }
1100
1101 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1102
1103 /* Global IP4 main. */
1104 #ifndef CLIB_MARCH_VARIANT
1105 ip4_main_t ip4_main;
1106 #endif /* CLIB_MARCH_VARIANT */
1107
1108 static clib_error_t *
1109 ip4_lookup_init (vlib_main_t * vm)
1110 {
1111   ip4_main_t *im = &ip4_main;
1112   clib_error_t *error;
1113   uword i;
1114
1115   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1116     return error;
1117   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1118     return (error);
1119   if ((error = vlib_call_init_function (vm, fib_module_init)))
1120     return error;
1121   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1122     return error;
1123
1124   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1125     {
1126       u32 m;
1127
1128       if (i < 32)
1129         m = pow2_mask (i) << (32 - i);
1130       else
1131         m = ~0;
1132       im->fib_masks[i] = clib_host_to_net_u32 (m);
1133     }
1134
1135   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1136
1137   /* Create FIB with index 0 and table id of 0. */
1138   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1139                                      FIB_SOURCE_DEFAULT_ROUTE);
1140   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1141                                       MFIB_SOURCE_DEFAULT_ROUTE);
1142
1143   {
1144     pg_node_t *pn;
1145     pn = pg_get_node (ip4_lookup_node.index);
1146     pn->unformat_edit = unformat_pg_ip4_header;
1147   }
1148
1149   {
1150     ethernet_arp_header_t h;
1151
1152     clib_memset (&h, 0, sizeof (h));
1153
1154 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1155 #define _8(f,v) h.f = v;
1156     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1157     _16 (l3_type, ETHERNET_TYPE_IP4);
1158     _8 (n_l2_address_bytes, 6);
1159     _8 (n_l3_address_bytes, 4);
1160     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1161 #undef _16
1162 #undef _8
1163
1164     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1165                                /* data */ &h,
1166                                sizeof (h),
1167                                /* alloc chunk size */ 8,
1168                                "ip4 arp");
1169   }
1170
1171   return error;
1172 }
1173
1174 VLIB_INIT_FUNCTION (ip4_lookup_init);
1175
1176 typedef struct
1177 {
1178   /* Adjacency taken. */
1179   u32 dpo_index;
1180   u32 flow_hash;
1181   u32 fib_index;
1182
1183   /* Packet data, possibly *after* rewrite. */
1184   u8 packet_data[64 - 1 * sizeof (u32)];
1185 }
1186 ip4_forward_next_trace_t;
1187
1188 #ifndef CLIB_MARCH_VARIANT
1189 u8 *
1190 format_ip4_forward_next_trace (u8 * s, va_list * args)
1191 {
1192   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195   u32 indent = format_get_indent (s);
1196   s = format (s, "%U%U",
1197               format_white_space, indent,
1198               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1199   return s;
1200 }
1201 #endif
1202
1203 static u8 *
1204 format_ip4_lookup_trace (u8 * s, va_list * args)
1205 {
1206   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1207   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1208   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1209   u32 indent = format_get_indent (s);
1210
1211   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1212               t->fib_index, t->dpo_index, t->flow_hash);
1213   s = format (s, "\n%U%U",
1214               format_white_space, indent,
1215               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1216   return s;
1217 }
1218
1219 static u8 *
1220 format_ip4_rewrite_trace (u8 * s, va_list * args)
1221 {
1222   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1223   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1224   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1225   u32 indent = format_get_indent (s);
1226
1227   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1228               t->fib_index, t->dpo_index, format_ip_adjacency,
1229               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1230   s = format (s, "\n%U%U",
1231               format_white_space, indent,
1232               format_ip_adjacency_packet_data,
1233               t->packet_data, sizeof (t->packet_data));
1234   return s;
1235 }
1236
1237 #ifndef CLIB_MARCH_VARIANT
1238 /* Common trace function for all ip4-forward next nodes. */
1239 void
1240 ip4_forward_next_trace (vlib_main_t * vm,
1241                         vlib_node_runtime_t * node,
1242                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1243 {
1244   u32 *from, n_left;
1245   ip4_main_t *im = &ip4_main;
1246
1247   n_left = frame->n_vectors;
1248   from = vlib_frame_vector_args (frame);
1249
1250   while (n_left >= 4)
1251     {
1252       u32 bi0, bi1;
1253       vlib_buffer_t *b0, *b1;
1254       ip4_forward_next_trace_t *t0, *t1;
1255
1256       /* Prefetch next iteration. */
1257       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1258       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1259
1260       bi0 = from[0];
1261       bi1 = from[1];
1262
1263       b0 = vlib_get_buffer (vm, bi0);
1264       b1 = vlib_get_buffer (vm, bi1);
1265
1266       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1267         {
1268           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1269           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1270           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1271           t0->fib_index =
1272             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1273              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1274             vec_elt (im->fib_index_by_sw_if_index,
1275                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1276
1277           clib_memcpy_fast (t0->packet_data,
1278                             vlib_buffer_get_current (b0),
1279                             sizeof (t0->packet_data));
1280         }
1281       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1284           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1285           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1286           t1->fib_index =
1287             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1292                             sizeof (t1->packet_data));
1293         }
1294       from += 2;
1295       n_left -= 2;
1296     }
1297
1298   while (n_left >= 1)
1299     {
1300       u32 bi0;
1301       vlib_buffer_t *b0;
1302       ip4_forward_next_trace_t *t0;
1303
1304       bi0 = from[0];
1305
1306       b0 = vlib_get_buffer (vm, bi0);
1307
1308       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1309         {
1310           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1311           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1312           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1313           t0->fib_index =
1314             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1315              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1316             vec_elt (im->fib_index_by_sw_if_index,
1317                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1318           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1319                             sizeof (t0->packet_data));
1320         }
1321       from += 1;
1322       n_left -= 1;
1323     }
1324 }
1325
1326 /* Compute TCP/UDP/ICMP4 checksum in software. */
1327 u16
1328 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1329                               ip4_header_t * ip0)
1330 {
1331   ip_csum_t sum0;
1332   u32 ip_header_length, payload_length_host_byte_order;
1333
1334   /* Initialize checksum with ip header. */
1335   ip_header_length = ip4_header_bytes (ip0);
1336   payload_length_host_byte_order =
1337     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1338   sum0 =
1339     clib_host_to_net_u32 (payload_length_host_byte_order +
1340                           (ip0->protocol << 16));
1341
1342   if (BITS (uword) == 32)
1343     {
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->src_address, u32));
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->dst_address, u32));
1350     }
1351   else
1352     sum0 =
1353       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1354
1355   return ip_calculate_l4_checksum (vm, p0, sum0,
1356                                    payload_length_host_byte_order, (u8 *) ip0,
1357                                    ip_header_length, NULL);
1358 }
1359
1360 u32
1361 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 {
1363   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1364   udp_header_t *udp0;
1365   u16 sum16;
1366
1367   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1368           || ip0->protocol == IP_PROTOCOL_UDP);
1369
1370   udp0 = (void *) (ip0 + 1);
1371   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372     {
1373       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1374                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1375       return p0->flags;
1376     }
1377
1378   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379
1380   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1381                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1382
1383   return p0->flags;
1384 }
1385 #endif
1386
1387 /* *INDENT-OFF* */
1388 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 {
1390   .arc_name  = "ip4-local",
1391   .start_nodes = VNET_FEATURES ("ip4-local"),
1392   .last_in_arc = "ip4-local-end-of-arc",
1393 };
1394 /* *INDENT-ON* */
1395
1396 static inline void
1397 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1398                             ip4_header_t * ip, u8 is_udp, u8 * error,
1399                             u8 * good_tcp_udp)
1400 {
1401   u32 flags0;
1402   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1403   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1404   if (is_udp)
1405     {
1406       udp_header_t *udp;
1407       u32 ip_len, udp_len;
1408       i32 len_diff;
1409       udp = ip4_next_header (ip);
1410       /* Verify UDP length. */
1411       ip_len = clib_net_to_host_u16 (ip->length);
1412       udp_len = clib_net_to_host_u16 (udp->length);
1413
1414       len_diff = ip_len - udp_len;
1415       *good_tcp_udp &= len_diff >= 0;
1416       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1417     }
1418 }
1419
1420 #define ip4_local_csum_is_offloaded(_b)                                       \
1421   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1422    (vnet_buffer2 (_b)->oflags &                                               \
1423     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1424
1425 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1426     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1427         || ip4_local_csum_is_offloaded (_b)))
1428
1429 #define ip4_local_csum_is_valid(_b)                                     \
1430     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1431         || (ip4_local_csum_is_offloaded (_b))) != 0
1432
1433 static inline void
1434 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1435                          ip4_header_t * ih, u8 * error)
1436 {
1437   u8 is_udp, is_tcp_udp, good_tcp_udp;
1438
1439   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1440   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1441
1442   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1443     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1444   else
1445     good_tcp_udp = ip4_local_csum_is_valid (b);
1446
1447   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1448   *error = (is_tcp_udp && !good_tcp_udp
1449             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1450 }
1451
1452 static inline void
1453 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1454                             ip4_header_t ** ih, u8 * error)
1455 {
1456   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1457
1458   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1459   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1460
1461   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1462   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1463
1464   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1465   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1466
1467   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1468                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1469     {
1470       if (is_tcp_udp[0])
1471         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1472                                     &good_tcp_udp[0]);
1473       if (is_tcp_udp[1])
1474         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1475                                     &good_tcp_udp[1]);
1476     }
1477
1478   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1479               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1480   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1481               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1482 }
1483
1484 static inline void
1485 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1486                               vlib_buffer_t * b, u16 * next, u8 error,
1487                               u8 head_of_feature_arc)
1488 {
1489   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1490   u32 next_index;
1491
1492   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1493   b->error = error ? error_node->errors[error] : 0;
1494   if (head_of_feature_arc)
1495     {
1496       next_index = *next;
1497       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1498         {
1499           vnet_feature_arc_start (arc_index,
1500                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1501                                   &next_index, b);
1502           *next = next_index;
1503         }
1504     }
1505 }
1506
1507 typedef struct
1508 {
1509   ip4_address_t src;
1510   u32 lbi;
1511   u8 error;
1512   u8 first;
1513 } ip4_local_last_check_t;
1514
1515 static inline void
1516 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1517                      ip4_local_last_check_t * last_check, u8 * error0)
1518 {
1519   ip4_fib_mtrie_leaf_t leaf0;
1520   ip4_fib_mtrie_t *mtrie0;
1521   const dpo_id_t *dpo0;
1522   load_balance_t *lb0;
1523   u32 lbi0;
1524
1525   vnet_buffer (b)->ip.fib_index =
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1528
1529   /*
1530    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1531    *  adjacency for the destination address (the local interface address).
1532    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1533    *  adjacency for the source address (the remote sender's address)
1534    */
1535   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1536       last_check->first)
1537     {
1538       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1539       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1540       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1541       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1542       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1543
1544       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1545         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1546       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1547
1548       lb0 = load_balance_get (lbi0);
1549       dpo0 = load_balance_get_bucket_i (lb0, 0);
1550
1551       /*
1552        * Must have a route to source otherwise we drop the packet.
1553        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1554        *
1555        * The checks are:
1556        *  - the source is a recieve => it's from us => bogus, do this
1557        *    first since it sets a different error code.
1558        *  - uRPF check for any route to source - accept if passes.
1559        *  - allow packets destined to the broadcast address from unknown sources
1560        */
1561
1562       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1563                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1564                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1565       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1566                   && !fib_urpf_check_size (lb0->lb_urpf)
1567                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1568                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1569
1570       last_check->src.as_u32 = ip0->src_address.as_u32;
1571       last_check->lbi = lbi0;
1572       last_check->error = *error0;
1573       last_check->first = 0;
1574     }
1575   else
1576     {
1577       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1578         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1579       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1580       *error0 = last_check->error;
1581     }
1582 }
1583
1584 static inline void
1585 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1586                         ip4_local_last_check_t * last_check, u8 * error)
1587 {
1588   ip4_fib_mtrie_leaf_t leaf[2];
1589   ip4_fib_mtrie_t *mtrie[2];
1590   const dpo_id_t *dpo[2];
1591   load_balance_t *lb[2];
1592   u32 not_last_hit;
1593   u32 lbi[2];
1594
1595   not_last_hit = last_check->first;
1596   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1597   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1598
1599   vnet_buffer (b[0])->ip.fib_index =
1600     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1601     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1602     vnet_buffer (b[0])->ip.fib_index;
1603
1604   vnet_buffer (b[1])->ip.fib_index =
1605     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1606     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1607     vnet_buffer (b[1])->ip.fib_index;
1608
1609   /*
1610    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1611    *  adjacency for the destination address (the local interface address).
1612    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1613    *  adjacency for the source address (the remote sender's address)
1614    */
1615   if (PREDICT_TRUE (not_last_hit))
1616     {
1617       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1618       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1619
1620       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1621       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1622
1623       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1624                                            &ip[0]->src_address, 2);
1625       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1626                                            &ip[1]->src_address, 2);
1627
1628       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1629                                            &ip[0]->src_address, 3);
1630       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1631                                            &ip[1]->src_address, 3);
1632
1633       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1634       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1635
1636       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1637         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1638       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1639
1640       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1641         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1642       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1643
1644       lb[0] = load_balance_get (lbi[0]);
1645       lb[1] = load_balance_get (lbi[1]);
1646
1647       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1648       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1649
1650       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1651                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1652                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1653       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1654                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1655                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1656                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1657
1658       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1659                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1660                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1661       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1662                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1663                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1664                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1665
1666       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1667       last_check->lbi = lbi[1];
1668       last_check->error = error[1];
1669       last_check->first = 0;
1670     }
1671   else
1672     {
1673       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1674         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1675       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1676
1677       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1678         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1679       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1680
1681       error[0] = last_check->error;
1682       error[1] = last_check->error;
1683     }
1684 }
1685
1686 enum ip_local_packet_type_e
1687 {
1688   IP_LOCAL_PACKET_TYPE_L4,
1689   IP_LOCAL_PACKET_TYPE_NAT,
1690   IP_LOCAL_PACKET_TYPE_FRAG,
1691 };
1692
1693 /**
1694  * Determine packet type and next node.
1695  *
1696  * The expectation is that all packets that are not L4 will skip
1697  * checksums and source checks.
1698  */
1699 always_inline u8
1700 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1701 {
1702   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1703
1704   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1705     {
1706       *next = IP_LOCAL_NEXT_REASSEMBLY;
1707       return IP_LOCAL_PACKET_TYPE_FRAG;
1708     }
1709   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1710     {
1711       *next = lm->local_next_by_ip_protocol[ip->protocol];
1712       return IP_LOCAL_PACKET_TYPE_NAT;
1713     }
1714
1715   *next = lm->local_next_by_ip_protocol[ip->protocol];
1716   return IP_LOCAL_PACKET_TYPE_L4;
1717 }
1718
1719 static inline uword
1720 ip4_local_inline (vlib_main_t * vm,
1721                   vlib_node_runtime_t * node,
1722                   vlib_frame_t * frame, int head_of_feature_arc)
1723 {
1724   u32 *from, n_left_from;
1725   vlib_node_runtime_t *error_node =
1726     vlib_node_get_runtime (vm, ip4_local_node.index);
1727   u16 nexts[VLIB_FRAME_SIZE], *next;
1728   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1729   ip4_header_t *ip[2];
1730   u8 error[2], pt[2];
1731
1732   ip4_local_last_check_t last_check = {
1733     /*
1734      * 0.0.0.0 can appear as the source address of an IP packet,
1735      * as can any other address, hence the need to use the 'first'
1736      * member to make sure the .lbi is initialised for the first
1737      * packet.
1738      */
1739     .src = {.as_u32 = 0},
1740     .lbi = ~0,
1741     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1742     .first = 1,
1743   };
1744
1745   from = vlib_frame_vector_args (frame);
1746   n_left_from = frame->n_vectors;
1747
1748   if (node->flags & VLIB_NODE_FLAG_TRACE)
1749     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1750
1751   vlib_get_buffers (vm, from, bufs, n_left_from);
1752   b = bufs;
1753   next = nexts;
1754
1755   while (n_left_from >= 6)
1756     {
1757       u8 not_batch = 0;
1758
1759       /* Prefetch next iteration. */
1760       {
1761         vlib_prefetch_buffer_header (b[4], LOAD);
1762         vlib_prefetch_buffer_header (b[5], LOAD);
1763
1764         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1765         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1766       }
1767
1768       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1769
1770       ip[0] = vlib_buffer_get_current (b[0]);
1771       ip[1] = vlib_buffer_get_current (b[1]);
1772
1773       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1774       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1775
1776       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1777       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1778
1779       not_batch = pt[0] ^ pt[1];
1780
1781       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1782         goto skip_checks;
1783
1784       if (PREDICT_TRUE (not_batch == 0))
1785         {
1786           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1787           ip4_local_check_src_x2 (b, ip, &last_check, error);
1788         }
1789       else
1790         {
1791           if (!pt[0])
1792             {
1793               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1794               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1795             }
1796           if (!pt[1])
1797             {
1798               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1799               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1800             }
1801         }
1802
1803     skip_checks:
1804
1805       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1806                                     head_of_feature_arc);
1807       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1808                                     head_of_feature_arc);
1809
1810       b += 2;
1811       next += 2;
1812       n_left_from -= 2;
1813     }
1814
1815   while (n_left_from > 0)
1816     {
1817       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1818
1819       ip[0] = vlib_buffer_get_current (b[0]);
1820       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1821       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1822
1823       if (head_of_feature_arc == 0 || pt[0])
1824         goto skip_check;
1825
1826       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1827       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1828
1829     skip_check:
1830
1831       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1832                                     head_of_feature_arc);
1833
1834       b += 1;
1835       next += 1;
1836       n_left_from -= 1;
1837     }
1838
1839   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1840   return frame->n_vectors;
1841 }
1842
1843 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1844                                vlib_frame_t * frame)
1845 {
1846   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1847 }
1848
1849 /* *INDENT-OFF* */
1850 VLIB_REGISTER_NODE (ip4_local_node) =
1851 {
1852   .name = "ip4-local",
1853   .vector_size = sizeof (u32),
1854   .format_trace = format_ip4_forward_next_trace,
1855   .n_errors = IP4_N_ERROR,
1856   .error_strings = ip4_error_strings,
1857   .n_next_nodes = IP_LOCAL_N_NEXT,
1858   .next_nodes =
1859   {
1860     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1861     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1862     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1863     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1864     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1865   },
1866 };
1867 /* *INDENT-ON* */
1868
1869
1870 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1871                                           vlib_node_runtime_t * node,
1872                                           vlib_frame_t * frame)
1873 {
1874   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1875 }
1876
1877 /* *INDENT-OFF* */
1878 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1879   .name = "ip4-local-end-of-arc",
1880   .vector_size = sizeof (u32),
1881
1882   .format_trace = format_ip4_forward_next_trace,
1883   .sibling_of = "ip4-local",
1884 };
1885
1886 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1887   .arc_name = "ip4-local",
1888   .node_name = "ip4-local-end-of-arc",
1889   .runs_before = 0, /* not before any other features */
1890 };
1891 /* *INDENT-ON* */
1892
1893 #ifndef CLIB_MARCH_VARIANT
1894 void
1895 ip4_register_protocol (u32 protocol, u32 node_index)
1896 {
1897   vlib_main_t *vm = vlib_get_main ();
1898   ip4_main_t *im = &ip4_main;
1899   ip_lookup_main_t *lm = &im->lookup_main;
1900
1901   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1902   lm->local_next_by_ip_protocol[protocol] =
1903     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1904 }
1905
1906 void
1907 ip4_unregister_protocol (u32 protocol)
1908 {
1909   ip4_main_t *im = &ip4_main;
1910   ip_lookup_main_t *lm = &im->lookup_main;
1911
1912   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1913   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1914 }
1915 #endif
1916
1917 static clib_error_t *
1918 show_ip_local_command_fn (vlib_main_t * vm,
1919                           unformat_input_t * input, vlib_cli_command_t * cmd)
1920 {
1921   ip4_main_t *im = &ip4_main;
1922   ip_lookup_main_t *lm = &im->lookup_main;
1923   int i;
1924
1925   vlib_cli_output (vm, "Protocols handled by ip4_local");
1926   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1927     {
1928       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1929         {
1930           u32 node_index = vlib_get_node (vm,
1931                                           ip4_local_node.index)->
1932             next_nodes[lm->local_next_by_ip_protocol[i]];
1933           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1934                            format_vlib_node_name, vm, node_index);
1935         }
1936     }
1937   return 0;
1938 }
1939
1940
1941
1942 /*?
1943  * Display the set of protocols handled by the local IPv4 stack.
1944  *
1945  * @cliexpar
1946  * Example of how to display local protocol table:
1947  * @cliexstart{show ip local}
1948  * Protocols handled by ip4_local
1949  * 1
1950  * 17
1951  * 47
1952  * @cliexend
1953 ?*/
1954 /* *INDENT-OFF* */
1955 VLIB_CLI_COMMAND (show_ip_local, static) =
1956 {
1957   .path = "show ip local",
1958   .function = show_ip_local_command_fn,
1959   .short_help = "show ip local",
1960 };
1961 /* *INDENT-ON* */
1962
1963 typedef enum
1964 {
1965   IP4_REWRITE_NEXT_DROP,
1966   IP4_REWRITE_NEXT_ICMP_ERROR,
1967   IP4_REWRITE_NEXT_FRAGMENT,
1968   IP4_REWRITE_N_NEXT            /* Last */
1969 } ip4_rewrite_next_t;
1970
1971 /**
1972  * This bits of an IPv4 address to mask to construct a multicast
1973  * MAC address
1974  */
1975 #if CLIB_ARCH_IS_BIG_ENDIAN
1976 #define IP4_MCAST_ADDR_MASK 0x007fffff
1977 #else
1978 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1979 #endif
1980
1981 always_inline void
1982 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1983                u16 adj_packet_bytes, bool df, u16 * next,
1984                u8 is_midchain, u32 * error)
1985 {
1986   if (packet_len > adj_packet_bytes)
1987     {
1988       *error = IP4_ERROR_MTU_EXCEEDED;
1989       if (df)
1990         {
1991           icmp4_error_set_vnet_buffer
1992             (b, ICMP4_destination_unreachable,
1993              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1994              adj_packet_bytes);
1995           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1996         }
1997       else
1998         {
1999           /* IP fragmentation */
2000           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2001                                    (is_midchain ?
2002                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2003                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2004           *next = IP4_REWRITE_NEXT_FRAGMENT;
2005         }
2006     }
2007 }
2008
2009 /* increment TTL & update checksum.
2010    Works either endian, so no need for byte swap. */
2011 static_always_inline void
2012 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2013 {
2014   i32 ttl;
2015   u32 checksum;
2016   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2017     return;
2018
2019   ttl = ip->ttl;
2020
2021   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2022   checksum += checksum >= 0xffff;
2023
2024   ip->checksum = checksum;
2025   ttl += 1;
2026   ip->ttl = ttl;
2027
2028   ASSERT (ip4_header_checksum_is_valid (ip));
2029 }
2030
2031 /* Decrement TTL & update checksum.
2032    Works either endian, so no need for byte swap. */
2033 static_always_inline void
2034 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2035                             u32 * error)
2036 {
2037   i32 ttl;
2038   u32 checksum;
2039   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2040     return;
2041
2042   ttl = ip->ttl;
2043
2044   /* Input node should have reject packets with ttl 0. */
2045   ASSERT (ip->ttl > 0);
2046
2047   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2048   checksum += checksum >= 0xffff;
2049
2050   ip->checksum = checksum;
2051   ttl -= 1;
2052   ip->ttl = ttl;
2053
2054   /*
2055    * If the ttl drops below 1 when forwarding, generate
2056    * an ICMP response.
2057    */
2058   if (PREDICT_FALSE (ttl <= 0))
2059     {
2060       *error = IP4_ERROR_TIME_EXPIRED;
2061       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2062       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2063                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2064                                    0);
2065       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2066     }
2067
2068   /* Verify checksum. */
2069   ASSERT (ip4_header_checksum_is_valid (ip) ||
2070           (vnet_buffer2 (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
2071 }
2072
2073
2074 always_inline uword
2075 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2076                              vlib_node_runtime_t * node,
2077                              vlib_frame_t * frame,
2078                              int do_counters, int is_midchain, int is_mcast)
2079 {
2080   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2081   u32 *from = vlib_frame_vector_args (frame);
2082   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2083   u16 nexts[VLIB_FRAME_SIZE], *next;
2084   u32 n_left_from;
2085   vlib_node_runtime_t *error_node =
2086     vlib_node_get_runtime (vm, ip4_input_node.index);
2087
2088   n_left_from = frame->n_vectors;
2089   u32 thread_index = vm->thread_index;
2090
2091   vlib_get_buffers (vm, from, bufs, n_left_from);
2092   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2093
2094 #if (CLIB_N_PREFETCHES >= 8)
2095   if (n_left_from >= 6)
2096     {
2097       int i;
2098       for (i = 2; i < 6; i++)
2099         vlib_prefetch_buffer_header (bufs[i], LOAD);
2100     }
2101
2102   next = nexts;
2103   b = bufs;
2104   while (n_left_from >= 8)
2105     {
2106       const ip_adjacency_t *adj0, *adj1;
2107       ip4_header_t *ip0, *ip1;
2108       u32 rw_len0, error0, adj_index0;
2109       u32 rw_len1, error1, adj_index1;
2110       u32 tx_sw_if_index0, tx_sw_if_index1;
2111       u8 *p;
2112
2113       if (is_midchain)
2114         {
2115           vlib_prefetch_buffer_header (b[6], LOAD);
2116           vlib_prefetch_buffer_header (b[7], LOAD);
2117         }
2118
2119       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2120       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2121
2122       /*
2123        * pre-fetch the per-adjacency counters
2124        */
2125       if (do_counters)
2126         {
2127           vlib_prefetch_combined_counter (&adjacency_counters,
2128                                           thread_index, adj_index0);
2129           vlib_prefetch_combined_counter (&adjacency_counters,
2130                                           thread_index, adj_index1);
2131         }
2132
2133       ip0 = vlib_buffer_get_current (b[0]);
2134       ip1 = vlib_buffer_get_current (b[1]);
2135
2136       error0 = error1 = IP4_ERROR_NONE;
2137
2138       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2139       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2140
2141       /* Rewrite packet header and updates lengths. */
2142       adj0 = adj_get (adj_index0);
2143       adj1 = adj_get (adj_index1);
2144
2145       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2146       rw_len0 = adj0[0].rewrite_header.data_bytes;
2147       rw_len1 = adj1[0].rewrite_header.data_bytes;
2148       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2149       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2150
2151       p = vlib_buffer_get_current (b[2]);
2152       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2153       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2154
2155       p = vlib_buffer_get_current (b[3]);
2156       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2157       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2158
2159       /* Check MTU of outgoing interface. */
2160       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2161       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2162
2163       if (b[0]->flags & VNET_BUFFER_F_GSO)
2164         ip0_len = gso_mtu_sz (b[0]);
2165       if (b[1]->flags & VNET_BUFFER_F_GSO)
2166         ip1_len = gso_mtu_sz (b[1]);
2167
2168       ip4_mtu_check (b[0], ip0_len,
2169                      adj0[0].rewrite_header.max_l3_packet_bytes,
2170                      ip0->flags_and_fragment_offset &
2171                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2172                      next + 0, is_midchain, &error0);
2173       ip4_mtu_check (b[1], ip1_len,
2174                      adj1[0].rewrite_header.max_l3_packet_bytes,
2175                      ip1->flags_and_fragment_offset &
2176                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2177                      next + 1, is_midchain, &error1);
2178
2179       if (is_mcast)
2180         {
2181           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2182                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2183                     IP4_ERROR_SAME_INTERFACE : error0);
2184           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2185                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2186                     IP4_ERROR_SAME_INTERFACE : error1);
2187         }
2188
2189       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2190        * to see the IP header */
2191       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2192         {
2193           u32 next_index = adj0[0].rewrite_header.next_index;
2194           vlib_buffer_advance (b[0], -(word) rw_len0);
2195
2196           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2197           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2198
2199           if (PREDICT_FALSE
2200               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2201             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2202                                                 tx_sw_if_index0,
2203                                                 &next_index, b[0],
2204                                                 adj0->ia_cfg_index);
2205
2206           next[0] = next_index;
2207           if (is_midchain)
2208             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2209                                         0 /* is_ip6 */ );
2210         }
2211       else
2212         {
2213           b[0]->error = error_node->errors[error0];
2214           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2215             ip4_ttl_inc (b[0], ip0);
2216         }
2217       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2218         {
2219           u32 next_index = adj1[0].rewrite_header.next_index;
2220           vlib_buffer_advance (b[1], -(word) rw_len1);
2221
2222           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2223           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2224
2225           if (PREDICT_FALSE
2226               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2227             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2228                                                 tx_sw_if_index1,
2229                                                 &next_index, b[1],
2230                                                 adj1->ia_cfg_index);
2231           next[1] = next_index;
2232           if (is_midchain)
2233             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2234                                         0 /* is_ip6 */ );
2235         }
2236       else
2237         {
2238           b[1]->error = error_node->errors[error1];
2239           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2240             ip4_ttl_inc (b[1], ip1);
2241         }
2242
2243       if (is_midchain)
2244         /* Guess we are only writing on ipv4 header. */
2245         vnet_rewrite_two_headers (adj0[0], adj1[0],
2246                                   ip0, ip1, sizeof (ip4_header_t));
2247       else
2248         /* Guess we are only writing on simple Ethernet header. */
2249         vnet_rewrite_two_headers (adj0[0], adj1[0],
2250                                   ip0, ip1, sizeof (ethernet_header_t));
2251
2252       if (do_counters)
2253         {
2254           if (error0 == IP4_ERROR_NONE)
2255             vlib_increment_combined_counter
2256               (&adjacency_counters,
2257                thread_index,
2258                adj_index0, 1,
2259                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2260
2261           if (error1 == IP4_ERROR_NONE)
2262             vlib_increment_combined_counter
2263               (&adjacency_counters,
2264                thread_index,
2265                adj_index1, 1,
2266                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2267         }
2268
2269       if (is_midchain)
2270         {
2271           if (error0 == IP4_ERROR_NONE)
2272             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2273           if (error1 == IP4_ERROR_NONE)
2274             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2275         }
2276
2277       if (is_mcast)
2278         {
2279           /* copy bytes from the IP address into the MAC rewrite */
2280           if (error0 == IP4_ERROR_NONE)
2281             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2282                                         adj0->rewrite_header.dst_mcast_offset,
2283                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2284           if (error1 == IP4_ERROR_NONE)
2285             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2286                                         adj1->rewrite_header.dst_mcast_offset,
2287                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2288         }
2289
2290       next += 2;
2291       b += 2;
2292       n_left_from -= 2;
2293     }
2294 #elif (CLIB_N_PREFETCHES >= 4)
2295   next = nexts;
2296   b = bufs;
2297   while (n_left_from >= 1)
2298     {
2299       ip_adjacency_t *adj0;
2300       ip4_header_t *ip0;
2301       u32 rw_len0, error0, adj_index0;
2302       u32 tx_sw_if_index0;
2303       u8 *p;
2304
2305       /* Prefetch next iteration */
2306       if (PREDICT_TRUE (n_left_from >= 4))
2307         {
2308           ip_adjacency_t *adj2;
2309           u32 adj_index2;
2310
2311           vlib_prefetch_buffer_header (b[3], LOAD);
2312           vlib_prefetch_buffer_data (b[2], LOAD);
2313
2314           /* Prefetch adj->rewrite_header */
2315           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2316           adj2 = adj_get (adj_index2);
2317           p = (u8 *) adj2;
2318           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2319                          LOAD);
2320         }
2321
2322       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2323
2324       /*
2325        * Prefetch the per-adjacency counters
2326        */
2327       if (do_counters)
2328         {
2329           vlib_prefetch_combined_counter (&adjacency_counters,
2330                                           thread_index, adj_index0);
2331         }
2332
2333       ip0 = vlib_buffer_get_current (b[0]);
2334
2335       error0 = IP4_ERROR_NONE;
2336
2337       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2338
2339       /* Rewrite packet header and updates lengths. */
2340       adj0 = adj_get (adj_index0);
2341
2342       /* Rewrite header was prefetched. */
2343       rw_len0 = adj0[0].rewrite_header.data_bytes;
2344       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2345
2346       /* Check MTU of outgoing interface. */
2347       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2348
2349       if (b[0]->flags & VNET_BUFFER_F_GSO)
2350         ip0_len = gso_mtu_sz (b[0]);
2351
2352       ip4_mtu_check (b[0], ip0_len,
2353                      adj0[0].rewrite_header.max_l3_packet_bytes,
2354                      ip0->flags_and_fragment_offset &
2355                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2356                      next + 0, is_midchain, &error0);
2357
2358       if (is_mcast)
2359         {
2360           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2361                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2362                     IP4_ERROR_SAME_INTERFACE : error0);
2363         }
2364
2365       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2366        * to see the IP header */
2367       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2368         {
2369           u32 next_index = adj0[0].rewrite_header.next_index;
2370           vlib_buffer_advance (b[0], -(word) rw_len0);
2371           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2372           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2373
2374           if (PREDICT_FALSE
2375               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2376             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2377                                                 tx_sw_if_index0,
2378                                                 &next_index, b[0],
2379                                                 adj0->ia_cfg_index);
2380           next[0] = next_index;
2381
2382           if (is_midchain)
2383             {
2384               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2385                                           0 /* is_ip6 */ );
2386
2387               /* Guess we are only writing on ipv4 header. */
2388               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2389             }
2390           else
2391             /* Guess we are only writing on simple Ethernet header. */
2392             vnet_rewrite_one_header (adj0[0], ip0,
2393                                      sizeof (ethernet_header_t));
2394
2395           /*
2396            * Bump the per-adjacency counters
2397            */
2398           if (do_counters)
2399             vlib_increment_combined_counter
2400               (&adjacency_counters,
2401                thread_index,
2402                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2403                                                            b[0]) + rw_len0);
2404
2405           if (is_midchain)
2406             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2407
2408           if (is_mcast)
2409             /* copy bytes from the IP address into the MAC rewrite */
2410             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2411                                         adj0->rewrite_header.dst_mcast_offset,
2412                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2413         }
2414       else
2415         {
2416           b[0]->error = error_node->errors[error0];
2417           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2418             ip4_ttl_inc (b[0], ip0);
2419         }
2420
2421       next += 1;
2422       b += 1;
2423       n_left_from -= 1;
2424     }
2425 #endif
2426
2427   while (n_left_from > 0)
2428     {
2429       ip_adjacency_t *adj0;
2430       ip4_header_t *ip0;
2431       u32 rw_len0, adj_index0, error0;
2432       u32 tx_sw_if_index0;
2433
2434       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2435
2436       adj0 = adj_get (adj_index0);
2437
2438       if (do_counters)
2439         vlib_prefetch_combined_counter (&adjacency_counters,
2440                                         thread_index, adj_index0);
2441
2442       ip0 = vlib_buffer_get_current (b[0]);
2443
2444       error0 = IP4_ERROR_NONE;
2445
2446       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2447
2448
2449       /* Update packet buffer attributes/set output interface. */
2450       rw_len0 = adj0[0].rewrite_header.data_bytes;
2451       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2452
2453       /* Check MTU of outgoing interface. */
2454       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2455       if (b[0]->flags & VNET_BUFFER_F_GSO)
2456         ip0_len = gso_mtu_sz (b[0]);
2457
2458       ip4_mtu_check (b[0], ip0_len,
2459                      adj0[0].rewrite_header.max_l3_packet_bytes,
2460                      ip0->flags_and_fragment_offset &
2461                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2462                      next + 0, is_midchain, &error0);
2463
2464       if (is_mcast)
2465         {
2466           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2467                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2468                     IP4_ERROR_SAME_INTERFACE : error0);
2469         }
2470
2471       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2472        * to see the IP header */
2473       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2474         {
2475           u32 next_index = adj0[0].rewrite_header.next_index;
2476           vlib_buffer_advance (b[0], -(word) rw_len0);
2477           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2478           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2479
2480           if (PREDICT_FALSE
2481               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2482             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2483                                                 tx_sw_if_index0,
2484                                                 &next_index, b[0],
2485                                                 adj0->ia_cfg_index);
2486           next[0] = next_index;
2487
2488           if (is_midchain)
2489             {
2490               /* this acts on the packet that is about to be encapped */
2491               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2492                                           0 /* is_ip6 */ );
2493
2494               /* Guess we are only writing on ipv4 header. */
2495               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2496             }
2497           else
2498             /* Guess we are only writing on simple Ethernet header. */
2499             vnet_rewrite_one_header (adj0[0], ip0,
2500                                      sizeof (ethernet_header_t));
2501
2502           if (do_counters)
2503             vlib_increment_combined_counter
2504               (&adjacency_counters,
2505                thread_index, adj_index0, 1,
2506                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2507
2508           if (is_midchain)
2509             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2510
2511           if (is_mcast)
2512             /* copy bytes from the IP address into the MAC rewrite */
2513             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2514                                         adj0->rewrite_header.dst_mcast_offset,
2515                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2516         }
2517       else
2518         {
2519           b[0]->error = error_node->errors[error0];
2520           /* undo the TTL decrement - we'll be back to do it again */
2521           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2522             ip4_ttl_inc (b[0], ip0);
2523         }
2524
2525       next += 1;
2526       b += 1;
2527       n_left_from -= 1;
2528     }
2529
2530
2531   /* Need to do trace after rewrites to pick up new packet data. */
2532   if (node->flags & VLIB_NODE_FLAG_TRACE)
2533     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2534
2535   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2536   return frame->n_vectors;
2537 }
2538
2539 always_inline uword
2540 ip4_rewrite_inline (vlib_main_t * vm,
2541                     vlib_node_runtime_t * node,
2542                     vlib_frame_t * frame,
2543                     int do_counters, int is_midchain, int is_mcast)
2544 {
2545   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2546                                       is_midchain, is_mcast);
2547 }
2548
2549
2550 /** @brief IPv4 rewrite node.
2551     @node ip4-rewrite
2552
2553     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2554     header checksum, fetch the ip adjacency, check the outbound mtu,
2555     apply the adjacency rewrite, and send pkts to the adjacency
2556     rewrite header's rewrite_next_index.
2557
2558     @param vm vlib_main_t corresponding to the current thread
2559     @param node vlib_node_runtime_t
2560     @param frame vlib_frame_t whose contents should be dispatched
2561
2562     @par Graph mechanics: buffer metadata, next index usage
2563
2564     @em Uses:
2565     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2566         - the rewrite adjacency index
2567     - <code>adj->lookup_next_index</code>
2568         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2569           the packet will be dropped.
2570     - <code>adj->rewrite_header</code>
2571         - Rewrite string length, rewrite string, next_index
2572
2573     @em Sets:
2574     - <code>b->current_data, b->current_length</code>
2575         - Updated net of applying the rewrite string
2576
2577     <em>Next Indices:</em>
2578     - <code> adj->rewrite_header.next_index </code>
2579       or @c ip4-drop
2580 */
2581
2582 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2583                                  vlib_frame_t * frame)
2584 {
2585   if (adj_are_counters_enabled ())
2586     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2587   else
2588     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2589 }
2590
2591 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2592                                        vlib_node_runtime_t * node,
2593                                        vlib_frame_t * frame)
2594 {
2595   if (adj_are_counters_enabled ())
2596     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2597   else
2598     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2599 }
2600
2601 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2602                                   vlib_node_runtime_t * node,
2603                                   vlib_frame_t * frame)
2604 {
2605   if (adj_are_counters_enabled ())
2606     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2607   else
2608     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2609 }
2610
2611 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2612                                        vlib_node_runtime_t * node,
2613                                        vlib_frame_t * frame)
2614 {
2615   if (adj_are_counters_enabled ())
2616     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2617   else
2618     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2619 }
2620
2621 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2622                                         vlib_node_runtime_t * node,
2623                                         vlib_frame_t * frame)
2624 {
2625   if (adj_are_counters_enabled ())
2626     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2627   else
2628     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2629 }
2630
2631 /* *INDENT-OFF* */
2632 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2633   .name = "ip4-rewrite",
2634   .vector_size = sizeof (u32),
2635
2636   .format_trace = format_ip4_rewrite_trace,
2637
2638   .n_next_nodes = IP4_REWRITE_N_NEXT,
2639   .next_nodes = {
2640     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2641     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2642     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2643   },
2644 };
2645
2646 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2647   .name = "ip4-rewrite-bcast",
2648   .vector_size = sizeof (u32),
2649
2650   .format_trace = format_ip4_rewrite_trace,
2651   .sibling_of = "ip4-rewrite",
2652 };
2653
2654 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2655   .name = "ip4-rewrite-mcast",
2656   .vector_size = sizeof (u32),
2657
2658   .format_trace = format_ip4_rewrite_trace,
2659   .sibling_of = "ip4-rewrite",
2660 };
2661
2662 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2663   .name = "ip4-mcast-midchain",
2664   .vector_size = sizeof (u32),
2665
2666   .format_trace = format_ip4_rewrite_trace,
2667   .sibling_of = "ip4-rewrite",
2668 };
2669
2670 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2671   .name = "ip4-midchain",
2672   .vector_size = sizeof (u32),
2673   .format_trace = format_ip4_rewrite_trace,
2674   .sibling_of = "ip4-rewrite",
2675 };
2676 /* *INDENT-ON */
2677
2678 static int
2679 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2680 {
2681   ip4_fib_mtrie_t *mtrie0;
2682   ip4_fib_mtrie_leaf_t leaf0;
2683   u32 lbi0;
2684
2685   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2686
2687   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2688   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2689   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2690
2691   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2692
2693   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2694 }
2695
2696 static clib_error_t *
2697 test_lookup_command_fn (vlib_main_t * vm,
2698                         unformat_input_t * input, vlib_cli_command_t * cmd)
2699 {
2700   ip4_fib_t *fib;
2701   u32 table_id = 0;
2702   f64 count = 1;
2703   u32 n;
2704   int i;
2705   ip4_address_t ip4_base_address;
2706   u64 errors = 0;
2707
2708   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2709     {
2710       if (unformat (input, "table %d", &table_id))
2711         {
2712           /* Make sure the entry exists. */
2713           fib = ip4_fib_get (table_id);
2714           if ((fib) && (fib->index != table_id))
2715             return clib_error_return (0, "<fib-index> %d does not exist",
2716                                       table_id);
2717         }
2718       else if (unformat (input, "count %f", &count))
2719         ;
2720
2721       else if (unformat (input, "%U",
2722                          unformat_ip4_address, &ip4_base_address))
2723         ;
2724       else
2725         return clib_error_return (0, "unknown input `%U'",
2726                                   format_unformat_error, input);
2727     }
2728
2729   n = count;
2730
2731   for (i = 0; i < n; i++)
2732     {
2733       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2734         errors++;
2735
2736       ip4_base_address.as_u32 =
2737         clib_host_to_net_u32 (1 +
2738                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2739     }
2740
2741   if (errors)
2742     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2743   else
2744     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2745
2746   return 0;
2747 }
2748
2749 /*?
2750  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2751  * given FIB table to determine if there is a conflict with the
2752  * adjacency table. The fib-id can be determined by using the
2753  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2754  * of 0 is used.
2755  *
2756  * @todo This command uses fib-id, other commands use table-id (not
2757  * just a name, they are different indexes). Would like to change this
2758  * to table-id for consistency.
2759  *
2760  * @cliexpar
2761  * Example of how to run the test lookup command:
2762  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2763  * No errors in 2 lookups
2764  * @cliexend
2765 ?*/
2766 /* *INDENT-OFF* */
2767 VLIB_CLI_COMMAND (lookup_test_command, static) =
2768 {
2769   .path = "test lookup",
2770   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2771   .function = test_lookup_command_fn,
2772 };
2773 /* *INDENT-ON* */
2774
2775 static clib_error_t *
2776 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2777                              unformat_input_t * input,
2778                              vlib_cli_command_t * cmd)
2779 {
2780   int matched = 0;
2781   u32 table_id = 0;
2782   u32 flow_hash_config = 0;
2783   int rv;
2784
2785   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2786     {
2787       if (unformat (input, "table %d", &table_id))
2788         matched = 1;
2789 #define _(a, b, v)                                                            \
2790   else if (unformat (input, #a))                                              \
2791   {                                                                           \
2792     flow_hash_config |= v;                                                    \
2793     matched = 1;                                                              \
2794   }
2795       foreach_flow_hash_bit
2796 #undef _
2797         else
2798         break;
2799     }
2800
2801   if (matched == 0)
2802     return clib_error_return (0, "unknown input `%U'",
2803                               format_unformat_error, input);
2804
2805   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2806   switch (rv)
2807     {
2808     case 0:
2809       break;
2810
2811     case VNET_API_ERROR_NO_SUCH_FIB:
2812       return clib_error_return (0, "no such FIB table %d", table_id);
2813
2814     default:
2815       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2816       break;
2817     }
2818
2819   return 0;
2820 }
2821
2822 /*?
2823  * Configure the set of IPv4 fields used by the flow hash.
2824  *
2825  * @cliexpar
2826  * Example of how to set the flow hash on a given table:
2827  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2828  * Example of display the configured flow hash:
2829  * @cliexstart{show ip fib}
2830  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2831  * 0.0.0.0/0
2832  *   unicast-ip4-chain
2833  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2834  *     [0] [@0]: dpo-drop ip6
2835  * 0.0.0.0/32
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2838  *     [0] [@0]: dpo-drop ip6
2839  * 224.0.0.0/8
2840  *   unicast-ip4-chain
2841  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2842  *     [0] [@0]: dpo-drop ip6
2843  * 6.0.1.2/32
2844  *   unicast-ip4-chain
2845  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2846  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2847  * 7.0.0.1/32
2848  *   unicast-ip4-chain
2849  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2850  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2851  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2852  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2853  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2854  * 240.0.0.0/8
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2857  *     [0] [@0]: dpo-drop ip6
2858  * 255.255.255.255/32
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2861  *     [0] [@0]: dpo-drop ip6
2862  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2863  * 0.0.0.0/0
2864  *   unicast-ip4-chain
2865  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2866  *     [0] [@0]: dpo-drop ip6
2867  * 0.0.0.0/32
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2870  *     [0] [@0]: dpo-drop ip6
2871  * 172.16.1.0/24
2872  *   unicast-ip4-chain
2873  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2874  *     [0] [@4]: ipv4-glean: af_packet0
2875  * 172.16.1.1/32
2876  *   unicast-ip4-chain
2877  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2878  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2879  * 172.16.1.2/32
2880  *   unicast-ip4-chain
2881  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2882  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2883  * 172.16.2.0/24
2884  *   unicast-ip4-chain
2885  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2886  *     [0] [@4]: ipv4-glean: af_packet1
2887  * 172.16.2.1/32
2888  *   unicast-ip4-chain
2889  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2890  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2891  * 224.0.0.0/8
2892  *   unicast-ip4-chain
2893  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2894  *     [0] [@0]: dpo-drop ip6
2895  * 240.0.0.0/8
2896  *   unicast-ip4-chain
2897  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2898  *     [0] [@0]: dpo-drop ip6
2899  * 255.255.255.255/32
2900  *   unicast-ip4-chain
2901  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2902  *     [0] [@0]: dpo-drop ip6
2903  * @cliexend
2904 ?*/
2905 /* *INDENT-OFF* */
2906 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2907 {
2908   .path = "set ip flow-hash",
2909   .short_help =
2910   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2911   .function = set_ip_flow_hash_command_fn,
2912 };
2913 /* *INDENT-ON* */
2914
2915 #ifndef CLIB_MARCH_VARIANT
2916 int
2917 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2918                              u32 table_index)
2919 {
2920   vnet_main_t *vnm = vnet_get_main ();
2921   vnet_interface_main_t *im = &vnm->interface_main;
2922   ip4_main_t *ipm = &ip4_main;
2923   ip_lookup_main_t *lm = &ipm->lookup_main;
2924   vnet_classify_main_t *cm = &vnet_classify_main;
2925   ip4_address_t *if_addr;
2926
2927   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2928     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2929
2930   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2931     return VNET_API_ERROR_NO_SUCH_ENTRY;
2932
2933   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2934   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2935
2936   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2937
2938   if (NULL != if_addr)
2939     {
2940       fib_prefix_t pfx = {
2941         .fp_len = 32,
2942         .fp_proto = FIB_PROTOCOL_IP4,
2943         .fp_addr.ip4 = *if_addr,
2944       };
2945       u32 fib_index;
2946
2947       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2948                                                        sw_if_index);
2949
2950
2951       if (table_index != (u32) ~ 0)
2952         {
2953           dpo_id_t dpo = DPO_INVALID;
2954
2955           dpo_set (&dpo,
2956                    DPO_CLASSIFY,
2957                    DPO_PROTO_IP4,
2958                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2959
2960           fib_table_entry_special_dpo_add (fib_index,
2961                                            &pfx,
2962                                            FIB_SOURCE_CLASSIFY,
2963                                            FIB_ENTRY_FLAG_NONE, &dpo);
2964           dpo_reset (&dpo);
2965         }
2966       else
2967         {
2968           fib_table_entry_special_remove (fib_index,
2969                                           &pfx, FIB_SOURCE_CLASSIFY);
2970         }
2971     }
2972
2973   return 0;
2974 }
2975 #endif
2976
2977 static clib_error_t *
2978 set_ip_classify_command_fn (vlib_main_t * vm,
2979                             unformat_input_t * input,
2980                             vlib_cli_command_t * cmd)
2981 {
2982   u32 table_index = ~0;
2983   int table_index_set = 0;
2984   u32 sw_if_index = ~0;
2985   int rv;
2986
2987   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2988     {
2989       if (unformat (input, "table-index %d", &table_index))
2990         table_index_set = 1;
2991       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2992                          vnet_get_main (), &sw_if_index))
2993         ;
2994       else
2995         break;
2996     }
2997
2998   if (table_index_set == 0)
2999     return clib_error_return (0, "classify table-index must be specified");
3000
3001   if (sw_if_index == ~0)
3002     return clib_error_return (0, "interface / subif must be specified");
3003
3004   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3005
3006   switch (rv)
3007     {
3008     case 0:
3009       break;
3010
3011     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3012       return clib_error_return (0, "No such interface");
3013
3014     case VNET_API_ERROR_NO_SUCH_ENTRY:
3015       return clib_error_return (0, "No such classifier table");
3016     }
3017   return 0;
3018 }
3019
3020 /*?
3021  * Assign a classification table to an interface. The classification
3022  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3023  * commands. Once the table is create, use this command to filter packets
3024  * on an interface.
3025  *
3026  * @cliexpar
3027  * Example of how to assign a classification table to an interface:
3028  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3029 ?*/
3030 /* *INDENT-OFF* */
3031 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3032 {
3033     .path = "set ip classify",
3034     .short_help =
3035     "set ip classify intfc <interface> table-index <classify-idx>",
3036     .function = set_ip_classify_command_fn,
3037 };
3038 /* *INDENT-ON* */
3039
3040 /*
3041  * fd.io coding-style-patch-verification: ON
3042  *
3043  * Local Variables:
3044  * eval: (c-set-style "gnu")
3045  * End:
3046  */