fib: Source Address Selection
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58
59 #include <vnet/ip/ip4_forward.h>
60 #include <vnet/interface_output.h>
61 #include <vnet/classify/vnet_classify.h>
62
63 /** @brief IPv4 lookup node.
64     @node ip4-lookup
65
66     This is the main IPv4 lookup dispatch node.
67
68     @param vm vlib_main_t corresponding to the current thread
69     @param node vlib_node_runtime_t
70     @param frame vlib_frame_t whose contents should be dispatched
71
72     @par Graph mechanics: buffer metadata, next index usage
73
74     @em Uses:
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
76         - Indicates the @c sw_if_index value of the interface that the
77           packet was received on.
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
79         - When the value is @c ~0 then the node performs a longest prefix
80           match (LPM) for the packet destination address in the FIB attached
81           to the receive interface.
82         - Otherwise perform LPM for the packet destination address in the
83           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
84           value (0, 1, ...) and not a VRF id.
85
86     @em Sets:
87     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
88         - The lookup result adjacency index.
89
90     <em>Next Index:</em>
91     - Dispatches the packet to the node index found in
92       ip_adjacency_t @c adj->lookup_next_index
93       (where @c adj is the lookup result adjacency).
94 */
95 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
96                                 vlib_frame_t * frame)
97 {
98   return ip4_lookup_inline (vm, node, frame);
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303 #endif
304
305 static void
306 ip4_add_subnet_bcast_route (u32 fib_index,
307                             fib_prefix_t *pfx,
308                             u32 sw_if_index)
309 {
310   vnet_sw_interface_flags_t iflags;
311
312   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
313
314   fib_table_entry_special_remove(fib_index,
315                                  pfx,
316                                  FIB_SOURCE_INTERFACE);
317
318   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
319     {
320       fib_table_entry_update_one_path (fib_index, pfx,
321                                        FIB_SOURCE_INTERFACE,
322                                        FIB_ENTRY_FLAG_NONE,
323                                        DPO_PROTO_IP4,
324                                        /* No next-hop address */
325                                        &ADJ_BCAST_ADDR,
326                                        sw_if_index,
327                                        // invalid FIB index
328                                        ~0,
329                                        1,
330                                        // no out-label stack
331                                        NULL,
332                                        FIB_ROUTE_PATH_FLAG_NONE);
333     }
334   else
335     {
336         fib_table_entry_special_add(fib_index,
337                                     pfx,
338                                     FIB_SOURCE_INTERFACE,
339                                     (FIB_ENTRY_FLAG_DROP |
340                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
341     }
342 }
343
344 static void
345 ip4_add_interface_prefix_routes (ip4_main_t *im,
346                                  u32 sw_if_index,
347                                  u32 fib_index,
348                                  ip_interface_address_t * a)
349 {
350   ip_lookup_main_t *lm = &im->lookup_main;
351   ip_interface_prefix_t *if_prefix;
352   ip4_address_t *address = ip_interface_address_get_address (lm, a);
353
354   ip_interface_prefix_key_t key = {
355     .prefix = {
356       .fp_len = a->address_length,
357       .fp_proto = FIB_PROTOCOL_IP4,
358       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
359     },
360     .sw_if_index = sw_if_index,
361   };
362
363   fib_prefix_t pfx_special = {
364     .fp_proto = FIB_PROTOCOL_IP4,
365   };
366
367   /* If prefix already set on interface, just increment ref count & return */
368   if_prefix = ip_get_interface_prefix (lm, &key);
369   if (if_prefix)
370     {
371       if_prefix->ref_count += 1;
372       return;
373     }
374
375   /* New prefix - allocate a pool entry, initialize it, add to the hash */
376   pool_get (lm->if_prefix_pool, if_prefix);
377   if_prefix->ref_count = 1;
378   if_prefix->src_ia_index = a - lm->if_address_pool;
379   clib_memcpy (&if_prefix->key, &key, sizeof (key));
380   mhash_set (&lm->prefix_to_if_prefix_index, &key,
381              if_prefix - lm->if_prefix_pool, 0 /* old value */);
382
383   pfx_special.fp_len = a->address_length;
384   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386   /* set the glean route for the prefix */
387   fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                    FIB_SOURCE_INTERFACE,
389                                    (FIB_ENTRY_FLAG_CONNECTED |
390                                     FIB_ENTRY_FLAG_ATTACHED),
391                                    DPO_PROTO_IP4,
392                                    /* No next-hop address */
393                                    NULL,
394                                    sw_if_index,
395                                    /* invalid FIB index */
396                                    ~0,
397                                    1,
398                                    /* no out-label stack */
399                                    NULL,
400                                    FIB_ROUTE_PATH_FLAG_NONE);
401
402   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
403   if (a->address_length <= 30)
404     {
405       /* set a drop route for the base address of the prefix */
406       pfx_special.fp_len = 32;
407       pfx_special.fp_addr.ip4.as_u32 =
408         address->as_u32 & im->fib_masks[a->address_length];
409
410       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
411         fib_table_entry_special_add (fib_index, &pfx_special,
412                                      FIB_SOURCE_INTERFACE,
413                                      (FIB_ENTRY_FLAG_DROP |
414                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
415
416       /* set a route for the broadcast address of the prefix */
417       pfx_special.fp_len = 32;
418       pfx_special.fp_addr.ip4.as_u32 =
419         address->as_u32 | ~im->fib_masks[a->address_length];
420       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
421         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422
423
424     }
425   /* length == 31 - add an attached route for the other address */
426   else if (a->address_length == 31)
427     {
428       pfx_special.fp_len = 32;
429       pfx_special.fp_addr.ip4.as_u32 =
430         address->as_u32 ^ clib_host_to_net_u32(1);
431
432       fib_table_entry_update_one_path (fib_index, &pfx_special,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &pfx_special.fp_addr,
437                                        sw_if_index,
438                                        /* invalid FIB index */
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444 }
445
446 static void
447 ip4_add_interface_routes (u32 sw_if_index,
448                           ip4_main_t * im, u32 fib_index,
449                           ip_interface_address_t * a)
450 {
451   ip_lookup_main_t *lm = &im->lookup_main;
452   ip4_address_t *address = ip_interface_address_get_address (lm, a);
453   fib_prefix_t pfx = {
454     .fp_len = 32,
455     .fp_proto = FIB_PROTOCOL_IP4,
456     .fp_addr.ip4 = *address,
457   };
458
459   /* set special routes for the prefix if needed */
460   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
461
462   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
463     {
464       u32 classify_table_index =
465         lm->classify_table_index_by_sw_if_index[sw_if_index];
466       if (classify_table_index != (u32) ~ 0)
467         {
468           dpo_id_t dpo = DPO_INVALID;
469
470           dpo_set (&dpo,
471                    DPO_CLASSIFY,
472                    DPO_PROTO_IP4,
473                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
474
475           fib_table_entry_special_dpo_add (fib_index,
476                                            &pfx,
477                                            FIB_SOURCE_CLASSIFY,
478                                            FIB_ENTRY_FLAG_NONE, &dpo);
479           dpo_reset (&dpo);
480         }
481     }
482
483   fib_table_entry_update_one_path (fib_index, &pfx,
484                                    FIB_SOURCE_INTERFACE,
485                                    (FIB_ENTRY_FLAG_CONNECTED |
486                                     FIB_ENTRY_FLAG_LOCAL),
487                                    DPO_PROTO_IP4,
488                                    &pfx.fp_addr,
489                                    sw_if_index,
490                                    // invalid FIB index
491                                    ~0,
492                                    1, NULL,
493                                    FIB_ROUTE_PATH_FLAG_NONE);
494 }
495
496 static void
497 ip4_del_interface_prefix_routes (ip4_main_t * im,
498                                  u32 sw_if_index,
499                                  u32 fib_index,
500                                  ip4_address_t * address,
501                                  u32 address_length)
502 {
503   ip_lookup_main_t *lm = &im->lookup_main;
504   ip_interface_prefix_t *if_prefix;
505
506   ip_interface_prefix_key_t key = {
507     .prefix = {
508       .fp_len = address_length,
509       .fp_proto = FIB_PROTOCOL_IP4,
510       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
511     },
512     .sw_if_index = sw_if_index,
513   };
514
515   fib_prefix_t pfx_special = {
516     .fp_len = 32,
517     .fp_proto = FIB_PROTOCOL_IP4,
518   };
519
520   if_prefix = ip_get_interface_prefix (lm, &key);
521   if (!if_prefix)
522     {
523       clib_warning ("Prefix not found while deleting %U",
524                     format_ip4_address_and_length, address, address_length);
525       return;
526     }
527
528   if_prefix->ref_count -= 1;
529
530   /*
531    * Routes need to be adjusted if deleting last intf addr in prefix
532    *
533    * We're done now otherwise
534    */
535   if (if_prefix->ref_count > 0)
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541       /* Less work to do in FIB if we remove the covered /32s first */
542
543       /* first address in prefix */
544       pfx_special.fp_addr.ip4.as_u32 =
545         address->as_u32 & im->fib_masks[address_length];
546       pfx_special.fp_len = 32;
547
548       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
549         fib_table_entry_special_remove (fib_index,
550                                         &pfx_special,
551                                         FIB_SOURCE_INTERFACE);
552
553       /* prefix broadcast address */
554       pfx_special.fp_addr.ip4.as_u32 =
555         address->as_u32 | ~im->fib_masks[address_length];
556       pfx_special.fp_len = 32;
557
558       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
559         fib_table_entry_special_remove (fib_index,
560                                         &pfx_special,
561                                         FIB_SOURCE_INTERFACE);
562     }
563   else if (address_length == 31)
564     {
565       /* length == 31, delete attached route for the other address */
566       pfx_special.fp_addr.ip4.as_u32 =
567         address->as_u32 ^ clib_host_to_net_u32(1);
568
569       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
570     }
571
572   /* remove glean route for prefix */
573   pfx_special.fp_addr.ip4 = *address;
574   pfx_special.fp_len = address_length;
575   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
576
577   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
578   pool_put (lm->if_prefix_pool, if_prefix);
579 }
580
581 static void
582 ip4_del_interface_routes (u32 sw_if_index,
583                           ip4_main_t * im,
584                           u32 fib_index,
585                           ip4_address_t * address, u32 address_length)
586 {
587   fib_prefix_t pfx = {
588     .fp_len = 32,
589     .fp_proto = FIB_PROTOCOL_IP4,
590     .fp_addr.ip4 = *address,
591   };
592
593   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
594
595   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
596                                    address, address_length);
597 }
598
599 #ifndef CLIB_MARCH_VARIANT
600 void
601 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
602 {
603   ip4_main_t *im = &ip4_main;
604   vnet_main_t *vnm = vnet_get_main ();
605   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
606
607   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
608
609   /*
610    * enable/disable only on the 1<->0 transition
611    */
612   if (is_enable)
613     {
614       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
615         return;
616     }
617   else
618     {
619       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
620       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
621         return;
622     }
623   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
624                                !is_enable, 0, 0);
625
626
627   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
628                                sw_if_index, !is_enable, 0, 0);
629
630   if (is_enable)
631     hi->l3_if_count++;
632   else if (hi->l3_if_count)
633     hi->l3_if_count--;
634
635   {
636     ip4_enable_disable_interface_callback_t *cb;
637     vec_foreach (cb, im->enable_disable_interface_callbacks)
638       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
639   }
640 }
641
642 static clib_error_t *
643 ip4_add_del_interface_address_internal (vlib_main_t * vm,
644                                         u32 sw_if_index,
645                                         ip4_address_t * address,
646                                         u32 address_length, u32 is_del)
647 {
648   vnet_main_t *vnm = vnet_get_main ();
649   ip4_main_t *im = &ip4_main;
650   ip_lookup_main_t *lm = &im->lookup_main;
651   clib_error_t *error = 0;
652   u32 if_address_index;
653   ip4_address_fib_t ip4_af, *addr_fib = 0;
654
655   /* local0 interface doesn't support IP addressing  */
656   if (sw_if_index == 0)
657     {
658       return
659        clib_error_create ("local0 interface doesn't support IP addressing");
660     }
661
662   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
663   ip4_addr_fib_init (&ip4_af, address,
664                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
665   vec_add1 (addr_fib, ip4_af);
666
667   /*
668    * there is no support for adj-fib handling in the presence of overlapping
669    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
670    * most routers do.
671    */
672   /* *INDENT-OFF* */
673   if (!is_del)
674     {
675       /* When adding an address check that it does not conflict
676          with an existing address on any interface in this table. */
677       ip_interface_address_t *ia;
678       vnet_sw_interface_t *sif;
679
680       pool_foreach(sif, vnm->interface_main.sw_interfaces,
681       ({
682           if (im->fib_index_by_sw_if_index[sw_if_index] ==
683               im->fib_index_by_sw_if_index[sif->sw_if_index])
684             {
685               foreach_ip_interface_address
686                 (&im->lookup_main, ia, sif->sw_if_index,
687                  0 /* honor unnumbered */ ,
688                  ({
689                    ip4_address_t * x =
690                      ip_interface_address_get_address
691                      (&im->lookup_main, ia);
692
693                    if (ip4_destination_matches_route
694                        (im, address, x, ia->address_length) ||
695                        ip4_destination_matches_route (im,
696                                                       x,
697                                                       address,
698                                                       address_length))
699                      {
700                        /* an intf may have >1 addr from the same prefix */
701                        if ((sw_if_index == sif->sw_if_index) &&
702                            (ia->address_length == address_length) &&
703                            (x->as_u32 != address->as_u32))
704                          continue;
705
706                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
707                          /* if the address we're comparing against is stale
708                           * then the CP has not added this one back yet, maybe
709                           * it never will, so we have to assume it won't and
710                           * ignore it. if it does add it back, then it will fail
711                           * because this one is now present */
712                          continue;
713
714                        /* error if the length or intf was different */
715                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
716
717                        error = clib_error_create
718                          ("failed to add %U on %U which conflicts with %U for interface %U",
719                           format_ip4_address_and_length, address,
720                           address_length,
721                           format_vnet_sw_if_index_name, vnm,
722                           sw_if_index,
723                           format_ip4_address_and_length, x,
724                           ia->address_length,
725                           format_vnet_sw_if_index_name, vnm,
726                           sif->sw_if_index);
727                        goto done;
728                      }
729                  }));
730             }
731       }));
732     }
733   /* *INDENT-ON* */
734
735   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
736
737   if (is_del)
738     {
739       if (~0 == if_address_index)
740         {
741           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
742           error = clib_error_create ("%U not found for interface %U",
743                                      lm->format_address_and_length,
744                                      addr_fib, address_length,
745                                      format_vnet_sw_if_index_name, vnm,
746                                      sw_if_index);
747           goto done;
748         }
749
750       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
751                                         address_length, sw_if_index);
752       if (error)
753         goto done;
754     }
755   else
756     {
757       if (~0 != if_address_index)
758         {
759           ip_interface_address_t *ia;
760
761           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
762
763           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
764             {
765               if (ia->sw_if_index == sw_if_index)
766                 {
767                   /* re-adding an address during the replace action.
768                    * consdier this the update. clear the flag and
769                    * we're done */
770                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
771                   goto done;
772                 }
773               else
774                 {
775                   /* The prefix is moving from one interface to another.
776                    * delete the stale and add the new */
777                   ip4_add_del_interface_address_internal (vm,
778                                                           ia->sw_if_index,
779                                                           address,
780                                                           address_length, 1);
781                   ia = NULL;
782                   error = ip_interface_address_add (lm, sw_if_index,
783                                                     addr_fib, address_length,
784                                                     &if_address_index);
785                 }
786             }
787           else
788             {
789               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
790               error = clib_error_create
791                 ("Prefix %U already found on interface %U",
792                  lm->format_address_and_length, addr_fib, address_length,
793                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
794             }
795         }
796       else
797         error = ip_interface_address_add (lm, sw_if_index,
798                                           addr_fib, address_length,
799                                           &if_address_index);
800     }
801
802   if (error)
803     goto done;
804
805   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
806   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
807
808   /* intf addr routes are added/deleted on admin up/down */
809   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
810     {
811       if (is_del)
812         ip4_del_interface_routes (sw_if_index,
813                                   im, ip4_af.fib_index, address,
814                                   address_length);
815       else
816         ip4_add_interface_routes (sw_if_index,
817                                   im, ip4_af.fib_index,
818                                   pool_elt_at_index
819                                   (lm->if_address_pool, if_address_index));
820     }
821
822   ip4_add_del_interface_address_callback_t *cb;
823   vec_foreach (cb, im->add_del_interface_address_callbacks)
824     cb->function (im, cb->function_opaque, sw_if_index,
825                   address, address_length, if_address_index, is_del);
826
827 done:
828   vec_free (addr_fib);
829   return error;
830 }
831
832 clib_error_t *
833 ip4_add_del_interface_address (vlib_main_t * vm,
834                                u32 sw_if_index,
835                                ip4_address_t * address,
836                                u32 address_length, u32 is_del)
837 {
838   return ip4_add_del_interface_address_internal
839     (vm, sw_if_index, address, address_length, is_del);
840 }
841
842 void
843 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
844 {
845   ip_interface_address_t *ia;
846   ip4_main_t *im;
847
848   im = &ip4_main;
849
850   /*
851    * when directed broadcast is enabled, the subnet braodcast route will forward
852    * packets using an adjacency with a broadcast MAC. otherwise it drops
853    */
854   /* *INDENT-OFF* */
855   foreach_ip_interface_address(&im->lookup_main, ia,
856                                sw_if_index, 0,
857      ({
858        if (ia->address_length <= 30)
859          {
860            ip4_address_t *ipa;
861
862            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
863
864            fib_prefix_t pfx = {
865              .fp_len = 32,
866              .fp_proto = FIB_PROTOCOL_IP4,
867              .fp_addr = {
868                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
869              },
870            };
871
872            ip4_add_subnet_bcast_route
873              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
874                                                   sw_if_index),
875               &pfx, sw_if_index);
876          }
877      }));
878   /* *INDENT-ON* */
879 }
880 #endif
881
882 static clib_error_t *
883 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
884 {
885   ip4_main_t *im = &ip4_main;
886   ip_interface_address_t *ia;
887   ip4_address_t *a;
888   u32 is_admin_up, fib_index;
889
890   /* Fill in lookup tables with default table (0). */
891   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
892
893   vec_validate_init_empty (im->
894                            lookup_main.if_address_pool_index_by_sw_if_index,
895                            sw_if_index, ~0);
896
897   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
898
899   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
900
901   /* *INDENT-OFF* */
902   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
903                                 0 /* honor unnumbered */,
904   ({
905     a = ip_interface_address_get_address (&im->lookup_main, ia);
906     if (is_admin_up)
907       ip4_add_interface_routes (sw_if_index,
908                                 im, fib_index,
909                                 ia);
910     else
911       ip4_del_interface_routes (sw_if_index,
912                                 im, fib_index,
913                                 a, ia->address_length);
914   }));
915   /* *INDENT-ON* */
916
917   return 0;
918 }
919
920 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
921
922 /* Built-in ip4 unicast rx feature path definition */
923 /* *INDENT-OFF* */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
925 {
926   .arc_name = "ip4-unicast",
927   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928   .last_in_arc = "ip4-lookup",
929   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
930 };
931
932 VNET_FEATURE_INIT (ip4_flow_classify, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-flow-classify",
936   .runs_before = VNET_FEATURES ("ip4-inacl"),
937 };
938
939 VNET_FEATURE_INIT (ip4_inacl, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-inacl",
943   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
944 };
945
946 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ip4-source-and-port-range-check-rx",
950   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
951 };
952
953 VNET_FEATURE_INIT (ip4_policer_classify, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "ip4-policer-classify",
957   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
958 };
959
960 VNET_FEATURE_INIT (ip4_ipsec, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ipsec4-input-feature",
964   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
972 };
973
974 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-vxlan-bypass",
978   .runs_before = VNET_FEATURES ("ip4-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_not_enabled, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-not-enabled",
985   .runs_before = VNET_FEATURES ("ip4-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_lookup, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ip4-lookup",
992   .runs_before = 0,     /* not before any other features */
993 };
994
995 /* Built-in ip4 multicast rx feature path definition */
996 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
997 {
998   .arc_name = "ip4-multicast",
999   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1000   .last_in_arc = "ip4-mfib-forward-lookup",
1001   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "vpath-input-ip4",
1008   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1012 {
1013   .arc_name = "ip4-multicast",
1014   .node_name = "ip4-not-enabled",
1015   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1019 {
1020   .arc_name = "ip4-multicast",
1021   .node_name = "ip4-mfib-forward-lookup",
1022   .runs_before = 0,     /* last feature */
1023 };
1024
1025 /* Source and port-range check ip4 tx feature path definition */
1026 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1027 {
1028   .arc_name = "ip4-output",
1029   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1030   .last_in_arc = "interface-output",
1031   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ip4-source-and-port-range-check-tx",
1038   .runs_before = VNET_FEATURES ("ip4-outacl"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_outacl, static) =
1042 {
1043   .arc_name = "ip4-output",
1044   .node_name = "ip4-outacl",
1045   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1049 {
1050   .arc_name = "ip4-output",
1051   .node_name = "ipsec4-output-feature",
1052   .runs_before = VNET_FEATURES ("interface-output"),
1053 };
1054
1055 /* Built-in ip4 tx feature path definition */
1056 VNET_FEATURE_INIT (ip4_interface_output, static) =
1057 {
1058   .arc_name = "ip4-output",
1059   .node_name = "interface-output",
1060   .runs_before = 0,     /* not before any other features */
1061 };
1062 /* *INDENT-ON* */
1063
1064 static clib_error_t *
1065 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1066 {
1067   ip4_main_t *im = &ip4_main;
1068
1069   /* Fill in lookup tables with default table (0). */
1070   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1071   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1072
1073   if (!is_add)
1074     {
1075       ip4_main_t *im4 = &ip4_main;
1076       ip_lookup_main_t *lm4 = &im4->lookup_main;
1077       ip_interface_address_t *ia = 0;
1078       ip4_address_t *address;
1079       vlib_main_t *vm = vlib_get_main ();
1080
1081       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1082       /* *INDENT-OFF* */
1083       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1084       ({
1085         address = ip_interface_address_get_address (lm4, ia);
1086         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1087       }));
1088       /* *INDENT-ON* */
1089       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1090     }
1091
1092   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1093                                is_add, 0, 0);
1094
1095   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1096                                sw_if_index, is_add, 0, 0);
1097
1098   return /* no error */ 0;
1099 }
1100
1101 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1102
1103 /* Global IP4 main. */
1104 #ifndef CLIB_MARCH_VARIANT
1105 ip4_main_t ip4_main;
1106 #endif /* CLIB_MARCH_VARIANT */
1107
1108 static clib_error_t *
1109 ip4_lookup_init (vlib_main_t * vm)
1110 {
1111   ip4_main_t *im = &ip4_main;
1112   clib_error_t *error;
1113   uword i;
1114
1115   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1116     return error;
1117   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1118     return (error);
1119   if ((error = vlib_call_init_function (vm, fib_module_init)))
1120     return error;
1121   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1122     return error;
1123
1124   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1125     {
1126       u32 m;
1127
1128       if (i < 32)
1129         m = pow2_mask (i) << (32 - i);
1130       else
1131         m = ~0;
1132       im->fib_masks[i] = clib_host_to_net_u32 (m);
1133     }
1134
1135   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1136
1137   /* Create FIB with index 0 and table id of 0. */
1138   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1139                                      FIB_SOURCE_DEFAULT_ROUTE);
1140   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1141                                       MFIB_SOURCE_DEFAULT_ROUTE);
1142
1143   {
1144     pg_node_t *pn;
1145     pn = pg_get_node (ip4_lookup_node.index);
1146     pn->unformat_edit = unformat_pg_ip4_header;
1147   }
1148
1149   {
1150     ethernet_arp_header_t h;
1151
1152     clib_memset (&h, 0, sizeof (h));
1153
1154 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1155 #define _8(f,v) h.f = v;
1156     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1157     _16 (l3_type, ETHERNET_TYPE_IP4);
1158     _8 (n_l2_address_bytes, 6);
1159     _8 (n_l3_address_bytes, 4);
1160     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1161 #undef _16
1162 #undef _8
1163
1164     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1165                                /* data */ &h,
1166                                sizeof (h),
1167                                /* alloc chunk size */ 8,
1168                                "ip4 arp");
1169   }
1170
1171   return error;
1172 }
1173
1174 VLIB_INIT_FUNCTION (ip4_lookup_init);
1175
1176 typedef struct
1177 {
1178   /* Adjacency taken. */
1179   u32 dpo_index;
1180   u32 flow_hash;
1181   u32 fib_index;
1182
1183   /* Packet data, possibly *after* rewrite. */
1184   u8 packet_data[64 - 1 * sizeof (u32)];
1185 }
1186 ip4_forward_next_trace_t;
1187
1188 #ifndef CLIB_MARCH_VARIANT
1189 u8 *
1190 format_ip4_forward_next_trace (u8 * s, va_list * args)
1191 {
1192   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195   u32 indent = format_get_indent (s);
1196   s = format (s, "%U%U",
1197               format_white_space, indent,
1198               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1199   return s;
1200 }
1201 #endif
1202
1203 static u8 *
1204 format_ip4_lookup_trace (u8 * s, va_list * args)
1205 {
1206   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1207   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1208   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1209   u32 indent = format_get_indent (s);
1210
1211   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1212               t->fib_index, t->dpo_index, t->flow_hash);
1213   s = format (s, "\n%U%U",
1214               format_white_space, indent,
1215               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1216   return s;
1217 }
1218
1219 static u8 *
1220 format_ip4_rewrite_trace (u8 * s, va_list * args)
1221 {
1222   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1223   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1224   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1225   u32 indent = format_get_indent (s);
1226
1227   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1228               t->fib_index, t->dpo_index, format_ip_adjacency,
1229               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1230   s = format (s, "\n%U%U",
1231               format_white_space, indent,
1232               format_ip_adjacency_packet_data,
1233               t->packet_data, sizeof (t->packet_data));
1234   return s;
1235 }
1236
1237 #ifndef CLIB_MARCH_VARIANT
1238 /* Common trace function for all ip4-forward next nodes. */
1239 void
1240 ip4_forward_next_trace (vlib_main_t * vm,
1241                         vlib_node_runtime_t * node,
1242                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1243 {
1244   u32 *from, n_left;
1245   ip4_main_t *im = &ip4_main;
1246
1247   n_left = frame->n_vectors;
1248   from = vlib_frame_vector_args (frame);
1249
1250   while (n_left >= 4)
1251     {
1252       u32 bi0, bi1;
1253       vlib_buffer_t *b0, *b1;
1254       ip4_forward_next_trace_t *t0, *t1;
1255
1256       /* Prefetch next iteration. */
1257       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1258       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1259
1260       bi0 = from[0];
1261       bi1 = from[1];
1262
1263       b0 = vlib_get_buffer (vm, bi0);
1264       b1 = vlib_get_buffer (vm, bi1);
1265
1266       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1267         {
1268           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1269           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1270           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1271           t0->fib_index =
1272             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1273              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1274             vec_elt (im->fib_index_by_sw_if_index,
1275                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1276
1277           clib_memcpy_fast (t0->packet_data,
1278                             vlib_buffer_get_current (b0),
1279                             sizeof (t0->packet_data));
1280         }
1281       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1284           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1285           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1286           t1->fib_index =
1287             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1292                             sizeof (t1->packet_data));
1293         }
1294       from += 2;
1295       n_left -= 2;
1296     }
1297
1298   while (n_left >= 1)
1299     {
1300       u32 bi0;
1301       vlib_buffer_t *b0;
1302       ip4_forward_next_trace_t *t0;
1303
1304       bi0 = from[0];
1305
1306       b0 = vlib_get_buffer (vm, bi0);
1307
1308       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1309         {
1310           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1311           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1312           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1313           t0->fib_index =
1314             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1315              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1316             vec_elt (im->fib_index_by_sw_if_index,
1317                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1318           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1319                             sizeof (t0->packet_data));
1320         }
1321       from += 1;
1322       n_left -= 1;
1323     }
1324 }
1325
1326 /* Compute TCP/UDP/ICMP4 checksum in software. */
1327 u16
1328 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1329                               ip4_header_t * ip0)
1330 {
1331   ip_csum_t sum0;
1332   u32 ip_header_length, payload_length_host_byte_order;
1333
1334   /* Initialize checksum with ip header. */
1335   ip_header_length = ip4_header_bytes (ip0);
1336   payload_length_host_byte_order =
1337     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1338   sum0 =
1339     clib_host_to_net_u32 (payload_length_host_byte_order +
1340                           (ip0->protocol << 16));
1341
1342   if (BITS (uword) == 32)
1343     {
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->src_address, u32));
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->dst_address, u32));
1350     }
1351   else
1352     sum0 =
1353       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1354
1355   return ip_calculate_l4_checksum (vm, p0, sum0,
1356                                    payload_length_host_byte_order, (u8 *) ip0,
1357                                    ip_header_length, NULL);
1358 }
1359
1360 u32
1361 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 {
1363   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1364   udp_header_t *udp0;
1365   u16 sum16;
1366
1367   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1368           || ip0->protocol == IP_PROTOCOL_UDP);
1369
1370   udp0 = (void *) (ip0 + 1);
1371   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372     {
1373       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1374                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1375       return p0->flags;
1376     }
1377
1378   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379
1380   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1381                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1382
1383   return p0->flags;
1384 }
1385 #endif
1386
1387 /* *INDENT-OFF* */
1388 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 {
1390   .arc_name  = "ip4-local",
1391   .start_nodes = VNET_FEATURES ("ip4-local"),
1392   .last_in_arc = "ip4-local-end-of-arc",
1393 };
1394 /* *INDENT-ON* */
1395
1396 static inline void
1397 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1398                             ip4_header_t * ip, u8 is_udp, u8 * error,
1399                             u8 * good_tcp_udp)
1400 {
1401   u32 flags0;
1402   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1403   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1404   if (is_udp)
1405     {
1406       udp_header_t *udp;
1407       u32 ip_len, udp_len;
1408       i32 len_diff;
1409       udp = ip4_next_header (ip);
1410       /* Verify UDP length. */
1411       ip_len = clib_net_to_host_u16 (ip->length);
1412       udp_len = clib_net_to_host_u16 (udp->length);
1413
1414       len_diff = ip_len - udp_len;
1415       *good_tcp_udp &= len_diff >= 0;
1416       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1417     }
1418 }
1419
1420 #define ip4_local_csum_is_offloaded(_b)                                 \
1421     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1422         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1423
1424 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1425     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1426         || ip4_local_csum_is_offloaded (_b)))
1427
1428 #define ip4_local_csum_is_valid(_b)                                     \
1429     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1430         || (ip4_local_csum_is_offloaded (_b))) != 0
1431
1432 static inline void
1433 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1434                          ip4_header_t * ih, u8 * error)
1435 {
1436   u8 is_udp, is_tcp_udp, good_tcp_udp;
1437
1438   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1439   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1440
1441   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1442     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1443   else
1444     good_tcp_udp = ip4_local_csum_is_valid (b);
1445
1446   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1447   *error = (is_tcp_udp && !good_tcp_udp
1448             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1449 }
1450
1451 static inline void
1452 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1453                             ip4_header_t ** ih, u8 * error)
1454 {
1455   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1456
1457   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1458   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1459
1460   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1461   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1462
1463   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1464   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1465
1466   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1467                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1468     {
1469       if (is_tcp_udp[0])
1470         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1471                                     &good_tcp_udp[0]);
1472       if (is_tcp_udp[1])
1473         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1474                                     &good_tcp_udp[1]);
1475     }
1476
1477   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1478               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1479   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1480               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1481 }
1482
1483 static inline void
1484 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1485                               vlib_buffer_t * b, u16 * next, u8 error,
1486                               u8 head_of_feature_arc)
1487 {
1488   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1489   u32 next_index;
1490
1491   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1492   b->error = error ? error_node->errors[error] : 0;
1493   if (head_of_feature_arc)
1494     {
1495       next_index = *next;
1496       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1497         {
1498           vnet_feature_arc_start (arc_index,
1499                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1500                                   &next_index, b);
1501           *next = next_index;
1502         }
1503     }
1504 }
1505
1506 typedef struct
1507 {
1508   ip4_address_t src;
1509   u32 lbi;
1510   u8 error;
1511   u8 first;
1512 } ip4_local_last_check_t;
1513
1514 static inline void
1515 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1516                      ip4_local_last_check_t * last_check, u8 * error0)
1517 {
1518   ip4_fib_mtrie_leaf_t leaf0;
1519   ip4_fib_mtrie_t *mtrie0;
1520   const dpo_id_t *dpo0;
1521   load_balance_t *lb0;
1522   u32 lbi0;
1523
1524   vnet_buffer (b)->ip.fib_index =
1525     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1527
1528   /*
1529    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1530    *  adjacency for the destination address (the local interface address).
1531    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1532    *  adjacency for the source address (the remote sender's address)
1533    */
1534   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1535       last_check->first)
1536     {
1537       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1538       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1539       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1540       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1541       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1542
1543       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1544         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1545       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1546
1547       lb0 = load_balance_get (lbi0);
1548       dpo0 = load_balance_get_bucket_i (lb0, 0);
1549
1550       /*
1551        * Must have a route to source otherwise we drop the packet.
1552        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1553        *
1554        * The checks are:
1555        *  - the source is a recieve => it's from us => bogus, do this
1556        *    first since it sets a different error code.
1557        *  - uRPF check for any route to source - accept if passes.
1558        *  - allow packets destined to the broadcast address from unknown sources
1559        */
1560
1561       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1562                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1563                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1564       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1565                   && !fib_urpf_check_size (lb0->lb_urpf)
1566                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1567                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1568
1569       last_check->src.as_u32 = ip0->src_address.as_u32;
1570       last_check->lbi = lbi0;
1571       last_check->error = *error0;
1572       last_check->first = 0;
1573     }
1574   else
1575     {
1576       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1577         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1578       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1579       *error0 = last_check->error;
1580     }
1581 }
1582
1583 static inline void
1584 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1585                         ip4_local_last_check_t * last_check, u8 * error)
1586 {
1587   ip4_fib_mtrie_leaf_t leaf[2];
1588   ip4_fib_mtrie_t *mtrie[2];
1589   const dpo_id_t *dpo[2];
1590   load_balance_t *lb[2];
1591   u32 not_last_hit;
1592   u32 lbi[2];
1593
1594   not_last_hit = last_check->first;
1595   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1596   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1597
1598   vnet_buffer (b[0])->ip.fib_index =
1599     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1600     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1601     vnet_buffer (b[0])->ip.fib_index;
1602
1603   vnet_buffer (b[1])->ip.fib_index =
1604     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1605     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1606     vnet_buffer (b[1])->ip.fib_index;
1607
1608   /*
1609    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1610    *  adjacency for the destination address (the local interface address).
1611    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1612    *  adjacency for the source address (the remote sender's address)
1613    */
1614   if (PREDICT_TRUE (not_last_hit))
1615     {
1616       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1617       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1618
1619       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1620       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1621
1622       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1623                                            &ip[0]->src_address, 2);
1624       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1625                                            &ip[1]->src_address, 2);
1626
1627       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1628                                            &ip[0]->src_address, 3);
1629       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1630                                            &ip[1]->src_address, 3);
1631
1632       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1633       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1634
1635       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1636         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1637       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1638
1639       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1640         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1641       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1642
1643       lb[0] = load_balance_get (lbi[0]);
1644       lb[1] = load_balance_get (lbi[1]);
1645
1646       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1647       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1648
1649       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1650                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1651                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1652       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1653                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1654                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1655                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1656
1657       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1658                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1659                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1660       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1662                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1663                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1664
1665       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1666       last_check->lbi = lbi[1];
1667       last_check->error = error[1];
1668       last_check->first = 0;
1669     }
1670   else
1671     {
1672       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1673         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1674       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1675
1676       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1677         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1678       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1679
1680       error[0] = last_check->error;
1681       error[1] = last_check->error;
1682     }
1683 }
1684
1685 enum ip_local_packet_type_e
1686 {
1687   IP_LOCAL_PACKET_TYPE_L4,
1688   IP_LOCAL_PACKET_TYPE_NAT,
1689   IP_LOCAL_PACKET_TYPE_FRAG,
1690 };
1691
1692 /**
1693  * Determine packet type and next node.
1694  *
1695  * The expectation is that all packets that are not L4 will skip
1696  * checksums and source checks.
1697  */
1698 always_inline u8
1699 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1700 {
1701   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1702
1703   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1704     {
1705       *next = IP_LOCAL_NEXT_REASSEMBLY;
1706       return IP_LOCAL_PACKET_TYPE_FRAG;
1707     }
1708   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1709     {
1710       *next = lm->local_next_by_ip_protocol[ip->protocol];
1711       return IP_LOCAL_PACKET_TYPE_NAT;
1712     }
1713
1714   *next = lm->local_next_by_ip_protocol[ip->protocol];
1715   return IP_LOCAL_PACKET_TYPE_L4;
1716 }
1717
1718 static inline uword
1719 ip4_local_inline (vlib_main_t * vm,
1720                   vlib_node_runtime_t * node,
1721                   vlib_frame_t * frame, int head_of_feature_arc)
1722 {
1723   u32 *from, n_left_from;
1724   vlib_node_runtime_t *error_node =
1725     vlib_node_get_runtime (vm, ip4_local_node.index);
1726   u16 nexts[VLIB_FRAME_SIZE], *next;
1727   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1728   ip4_header_t *ip[2];
1729   u8 error[2], pt[2];
1730
1731   ip4_local_last_check_t last_check = {
1732     /*
1733      * 0.0.0.0 can appear as the source address of an IP packet,
1734      * as can any other address, hence the need to use the 'first'
1735      * member to make sure the .lbi is initialised for the first
1736      * packet.
1737      */
1738     .src = {.as_u32 = 0},
1739     .lbi = ~0,
1740     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1741     .first = 1,
1742   };
1743
1744   from = vlib_frame_vector_args (frame);
1745   n_left_from = frame->n_vectors;
1746
1747   if (node->flags & VLIB_NODE_FLAG_TRACE)
1748     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1749
1750   vlib_get_buffers (vm, from, bufs, n_left_from);
1751   b = bufs;
1752   next = nexts;
1753
1754   while (n_left_from >= 6)
1755     {
1756       u8 not_batch = 0;
1757
1758       /* Prefetch next iteration. */
1759       {
1760         vlib_prefetch_buffer_header (b[4], LOAD);
1761         vlib_prefetch_buffer_header (b[5], LOAD);
1762
1763         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1764         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1765       }
1766
1767       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1768
1769       ip[0] = vlib_buffer_get_current (b[0]);
1770       ip[1] = vlib_buffer_get_current (b[1]);
1771
1772       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1773       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1774
1775       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1776       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1777
1778       not_batch = pt[0] ^ pt[1];
1779
1780       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1781         goto skip_checks;
1782
1783       if (PREDICT_TRUE (not_batch == 0))
1784         {
1785           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1786           ip4_local_check_src_x2 (b, ip, &last_check, error);
1787         }
1788       else
1789         {
1790           if (!pt[0])
1791             {
1792               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1793               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1794             }
1795           if (!pt[1])
1796             {
1797               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1798               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1799             }
1800         }
1801
1802     skip_checks:
1803
1804       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1805                                     head_of_feature_arc);
1806       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1807                                     head_of_feature_arc);
1808
1809       b += 2;
1810       next += 2;
1811       n_left_from -= 2;
1812     }
1813
1814   while (n_left_from > 0)
1815     {
1816       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1817
1818       ip[0] = vlib_buffer_get_current (b[0]);
1819       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1820       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1821
1822       if (head_of_feature_arc == 0 || pt[0])
1823         goto skip_check;
1824
1825       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1826       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1827
1828     skip_check:
1829
1830       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1831                                     head_of_feature_arc);
1832
1833       b += 1;
1834       next += 1;
1835       n_left_from -= 1;
1836     }
1837
1838   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1839   return frame->n_vectors;
1840 }
1841
1842 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1843                                vlib_frame_t * frame)
1844 {
1845   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1846 }
1847
1848 /* *INDENT-OFF* */
1849 VLIB_REGISTER_NODE (ip4_local_node) =
1850 {
1851   .name = "ip4-local",
1852   .vector_size = sizeof (u32),
1853   .format_trace = format_ip4_forward_next_trace,
1854   .n_errors = IP4_N_ERROR,
1855   .error_strings = ip4_error_strings,
1856   .n_next_nodes = IP_LOCAL_N_NEXT,
1857   .next_nodes =
1858   {
1859     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1860     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1861     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1862     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1863     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1864   },
1865 };
1866 /* *INDENT-ON* */
1867
1868
1869 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1870                                           vlib_node_runtime_t * node,
1871                                           vlib_frame_t * frame)
1872 {
1873   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1874 }
1875
1876 /* *INDENT-OFF* */
1877 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1878   .name = "ip4-local-end-of-arc",
1879   .vector_size = sizeof (u32),
1880
1881   .format_trace = format_ip4_forward_next_trace,
1882   .sibling_of = "ip4-local",
1883 };
1884
1885 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1886   .arc_name = "ip4-local",
1887   .node_name = "ip4-local-end-of-arc",
1888   .runs_before = 0, /* not before any other features */
1889 };
1890 /* *INDENT-ON* */
1891
1892 #ifndef CLIB_MARCH_VARIANT
1893 void
1894 ip4_register_protocol (u32 protocol, u32 node_index)
1895 {
1896   vlib_main_t *vm = vlib_get_main ();
1897   ip4_main_t *im = &ip4_main;
1898   ip_lookup_main_t *lm = &im->lookup_main;
1899
1900   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1901   lm->local_next_by_ip_protocol[protocol] =
1902     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1903 }
1904
1905 void
1906 ip4_unregister_protocol (u32 protocol)
1907 {
1908   ip4_main_t *im = &ip4_main;
1909   ip_lookup_main_t *lm = &im->lookup_main;
1910
1911   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1912   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1913 }
1914 #endif
1915
1916 static clib_error_t *
1917 show_ip_local_command_fn (vlib_main_t * vm,
1918                           unformat_input_t * input, vlib_cli_command_t * cmd)
1919 {
1920   ip4_main_t *im = &ip4_main;
1921   ip_lookup_main_t *lm = &im->lookup_main;
1922   int i;
1923
1924   vlib_cli_output (vm, "Protocols handled by ip4_local");
1925   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1926     {
1927       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1928         {
1929           u32 node_index = vlib_get_node (vm,
1930                                           ip4_local_node.index)->
1931             next_nodes[lm->local_next_by_ip_protocol[i]];
1932           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1933                            format_vlib_node_name, vm, node_index);
1934         }
1935     }
1936   return 0;
1937 }
1938
1939
1940
1941 /*?
1942  * Display the set of protocols handled by the local IPv4 stack.
1943  *
1944  * @cliexpar
1945  * Example of how to display local protocol table:
1946  * @cliexstart{show ip local}
1947  * Protocols handled by ip4_local
1948  * 1
1949  * 17
1950  * 47
1951  * @cliexend
1952 ?*/
1953 /* *INDENT-OFF* */
1954 VLIB_CLI_COMMAND (show_ip_local, static) =
1955 {
1956   .path = "show ip local",
1957   .function = show_ip_local_command_fn,
1958   .short_help = "show ip local",
1959 };
1960 /* *INDENT-ON* */
1961
1962 typedef enum
1963 {
1964   IP4_REWRITE_NEXT_DROP,
1965   IP4_REWRITE_NEXT_ICMP_ERROR,
1966   IP4_REWRITE_NEXT_FRAGMENT,
1967   IP4_REWRITE_N_NEXT            /* Last */
1968 } ip4_rewrite_next_t;
1969
1970 /**
1971  * This bits of an IPv4 address to mask to construct a multicast
1972  * MAC address
1973  */
1974 #if CLIB_ARCH_IS_BIG_ENDIAN
1975 #define IP4_MCAST_ADDR_MASK 0x007fffff
1976 #else
1977 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1978 #endif
1979
1980 always_inline void
1981 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1982                u16 adj_packet_bytes, bool df, u16 * next,
1983                u8 is_midchain, u32 * error)
1984 {
1985   if (packet_len > adj_packet_bytes)
1986     {
1987       *error = IP4_ERROR_MTU_EXCEEDED;
1988       if (df)
1989         {
1990           icmp4_error_set_vnet_buffer
1991             (b, ICMP4_destination_unreachable,
1992              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1993              adj_packet_bytes);
1994           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1995         }
1996       else
1997         {
1998           /* IP fragmentation */
1999           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2000                                    (is_midchain ?
2001                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2002                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2003           *next = IP4_REWRITE_NEXT_FRAGMENT;
2004         }
2005     }
2006 }
2007
2008 /* increment TTL & update checksum.
2009    Works either endian, so no need for byte swap. */
2010 static_always_inline void
2011 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2012 {
2013   i32 ttl;
2014   u32 checksum;
2015   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2016     return;
2017
2018   ttl = ip->ttl;
2019
2020   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2021   checksum += checksum >= 0xffff;
2022
2023   ip->checksum = checksum;
2024   ttl += 1;
2025   ip->ttl = ttl;
2026
2027   ASSERT (ip4_header_checksum_is_valid (ip));
2028 }
2029
2030 /* Decrement TTL & update checksum.
2031    Works either endian, so no need for byte swap. */
2032 static_always_inline void
2033 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2034                             u32 * error)
2035 {
2036   i32 ttl;
2037   u32 checksum;
2038   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2039     return;
2040
2041   ttl = ip->ttl;
2042
2043   /* Input node should have reject packets with ttl 0. */
2044   ASSERT (ip->ttl > 0);
2045
2046   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2047   checksum += checksum >= 0xffff;
2048
2049   ip->checksum = checksum;
2050   ttl -= 1;
2051   ip->ttl = ttl;
2052
2053   /*
2054    * If the ttl drops below 1 when forwarding, generate
2055    * an ICMP response.
2056    */
2057   if (PREDICT_FALSE (ttl <= 0))
2058     {
2059       *error = IP4_ERROR_TIME_EXPIRED;
2060       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2061       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2062                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2063                                    0);
2064       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2065     }
2066
2067   /* Verify checksum. */
2068   ASSERT (ip4_header_checksum_is_valid (ip) ||
2069           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2070 }
2071
2072
2073 always_inline uword
2074 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2075                              vlib_node_runtime_t * node,
2076                              vlib_frame_t * frame,
2077                              int do_counters, int is_midchain, int is_mcast)
2078 {
2079   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2080   u32 *from = vlib_frame_vector_args (frame);
2081   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2082   u16 nexts[VLIB_FRAME_SIZE], *next;
2083   u32 n_left_from;
2084   vlib_node_runtime_t *error_node =
2085     vlib_node_get_runtime (vm, ip4_input_node.index);
2086
2087   n_left_from = frame->n_vectors;
2088   u32 thread_index = vm->thread_index;
2089
2090   vlib_get_buffers (vm, from, bufs, n_left_from);
2091   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2092
2093 #if (CLIB_N_PREFETCHES >= 8)
2094   if (n_left_from >= 6)
2095     {
2096       int i;
2097       for (i = 2; i < 6; i++)
2098         vlib_prefetch_buffer_header (bufs[i], LOAD);
2099     }
2100
2101   next = nexts;
2102   b = bufs;
2103   while (n_left_from >= 8)
2104     {
2105       const ip_adjacency_t *adj0, *adj1;
2106       ip4_header_t *ip0, *ip1;
2107       u32 rw_len0, error0, adj_index0;
2108       u32 rw_len1, error1, adj_index1;
2109       u32 tx_sw_if_index0, tx_sw_if_index1;
2110       u8 *p;
2111
2112       vlib_prefetch_buffer_header (b[6], LOAD);
2113       vlib_prefetch_buffer_header (b[7], LOAD);
2114
2115       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2116       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2117
2118       /*
2119        * pre-fetch the per-adjacency counters
2120        */
2121       if (do_counters)
2122         {
2123           vlib_prefetch_combined_counter (&adjacency_counters,
2124                                           thread_index, adj_index0);
2125           vlib_prefetch_combined_counter (&adjacency_counters,
2126                                           thread_index, adj_index1);
2127         }
2128
2129       ip0 = vlib_buffer_get_current (b[0]);
2130       ip1 = vlib_buffer_get_current (b[1]);
2131
2132       error0 = error1 = IP4_ERROR_NONE;
2133
2134       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2135       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2136
2137       /* Rewrite packet header and updates lengths. */
2138       adj0 = adj_get (adj_index0);
2139       adj1 = adj_get (adj_index1);
2140
2141       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2142       rw_len0 = adj0[0].rewrite_header.data_bytes;
2143       rw_len1 = adj1[0].rewrite_header.data_bytes;
2144       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2145       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2146
2147       p = vlib_buffer_get_current (b[2]);
2148       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2149       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2150
2151       p = vlib_buffer_get_current (b[3]);
2152       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2153       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2154
2155       /* Check MTU of outgoing interface. */
2156       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2157       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2158
2159       if (b[0]->flags & VNET_BUFFER_F_GSO)
2160         ip0_len = gso_mtu_sz (b[0]);
2161       if (b[1]->flags & VNET_BUFFER_F_GSO)
2162         ip1_len = gso_mtu_sz (b[1]);
2163
2164       ip4_mtu_check (b[0], ip0_len,
2165                      adj0[0].rewrite_header.max_l3_packet_bytes,
2166                      ip0->flags_and_fragment_offset &
2167                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2168                      next + 0, is_midchain, &error0);
2169       ip4_mtu_check (b[1], ip1_len,
2170                      adj1[0].rewrite_header.max_l3_packet_bytes,
2171                      ip1->flags_and_fragment_offset &
2172                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2173                      next + 1, is_midchain, &error1);
2174
2175       if (is_mcast)
2176         {
2177           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2178                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2179                     IP4_ERROR_SAME_INTERFACE : error0);
2180           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2181                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2182                     IP4_ERROR_SAME_INTERFACE : error1);
2183         }
2184
2185       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2186        * to see the IP header */
2187       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2188         {
2189           u32 next_index = adj0[0].rewrite_header.next_index;
2190           vlib_buffer_advance (b[0], -(word) rw_len0);
2191
2192           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2193           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2194
2195           if (PREDICT_FALSE
2196               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2197             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2198                                                 tx_sw_if_index0,
2199                                                 &next_index, b[0],
2200                                                 adj0->ia_cfg_index);
2201
2202           next[0] = next_index;
2203           if (is_midchain)
2204             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2205                                         0 /* is_ip6 */ );
2206         }
2207       else
2208         {
2209           b[0]->error = error_node->errors[error0];
2210           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2211             ip4_ttl_inc (b[0], ip0);
2212         }
2213       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2214         {
2215           u32 next_index = adj1[0].rewrite_header.next_index;
2216           vlib_buffer_advance (b[1], -(word) rw_len1);
2217
2218           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2219           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2220
2221           if (PREDICT_FALSE
2222               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2223             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2224                                                 tx_sw_if_index1,
2225                                                 &next_index, b[1],
2226                                                 adj1->ia_cfg_index);
2227           next[1] = next_index;
2228           if (is_midchain)
2229             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2230                                         0 /* is_ip6 */ );
2231         }
2232       else
2233         {
2234           b[1]->error = error_node->errors[error1];
2235           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2236             ip4_ttl_inc (b[1], ip1);
2237         }
2238
2239       if (is_midchain)
2240         /* Guess we are only writing on ipv4 header. */
2241         vnet_rewrite_two_headers (adj0[0], adj1[0],
2242                                   ip0, ip1, sizeof (ip4_header_t));
2243       else
2244         /* Guess we are only writing on simple Ethernet header. */
2245         vnet_rewrite_two_headers (adj0[0], adj1[0],
2246                                   ip0, ip1, sizeof (ethernet_header_t));
2247
2248       if (do_counters)
2249         {
2250           if (error0 == IP4_ERROR_NONE)
2251             vlib_increment_combined_counter
2252               (&adjacency_counters,
2253                thread_index,
2254                adj_index0, 1,
2255                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2256
2257           if (error1 == IP4_ERROR_NONE)
2258             vlib_increment_combined_counter
2259               (&adjacency_counters,
2260                thread_index,
2261                adj_index1, 1,
2262                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2263         }
2264
2265       if (is_midchain)
2266         {
2267           if (error0 == IP4_ERROR_NONE)
2268             adj_midchain_fixup (vm, adj0, b[0]);
2269           if (error1 == IP4_ERROR_NONE)
2270             adj_midchain_fixup (vm, adj1, b[1]);
2271         }
2272
2273       if (is_mcast)
2274         {
2275           /* copy bytes from the IP address into the MAC rewrite */
2276           if (error0 == IP4_ERROR_NONE)
2277             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2278                                         adj0->rewrite_header.dst_mcast_offset,
2279                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2280           if (error1 == IP4_ERROR_NONE)
2281             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2282                                         adj1->rewrite_header.dst_mcast_offset,
2283                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2284         }
2285
2286       next += 2;
2287       b += 2;
2288       n_left_from -= 2;
2289     }
2290 #elif (CLIB_N_PREFETCHES >= 4)
2291   next = nexts;
2292   b = bufs;
2293   while (n_left_from >= 1)
2294     {
2295       ip_adjacency_t *adj0;
2296       ip4_header_t *ip0;
2297       u32 rw_len0, error0, adj_index0;
2298       u32 tx_sw_if_index0;
2299       u8 *p;
2300
2301       /* Prefetch next iteration */
2302       if (PREDICT_TRUE (n_left_from >= 4))
2303         {
2304           ip_adjacency_t *adj2;
2305           u32 adj_index2;
2306
2307           vlib_prefetch_buffer_header (b[3], LOAD);
2308           vlib_prefetch_buffer_data (b[2], LOAD);
2309
2310           /* Prefetch adj->rewrite_header */
2311           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2312           adj2 = adj_get (adj_index2);
2313           p = (u8 *) adj2;
2314           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2315                          LOAD);
2316         }
2317
2318       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2319
2320       /*
2321        * Prefetch the per-adjacency counters
2322        */
2323       if (do_counters)
2324         {
2325           vlib_prefetch_combined_counter (&adjacency_counters,
2326                                           thread_index, adj_index0);
2327         }
2328
2329       ip0 = vlib_buffer_get_current (b[0]);
2330
2331       error0 = IP4_ERROR_NONE;
2332
2333       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2334
2335       /* Rewrite packet header and updates lengths. */
2336       adj0 = adj_get (adj_index0);
2337
2338       /* Rewrite header was prefetched. */
2339       rw_len0 = adj0[0].rewrite_header.data_bytes;
2340       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2341
2342       /* Check MTU of outgoing interface. */
2343       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2344
2345       if (b[0]->flags & VNET_BUFFER_F_GSO)
2346         ip0_len = gso_mtu_sz (b[0]);
2347
2348       ip4_mtu_check (b[0], ip0_len,
2349                      adj0[0].rewrite_header.max_l3_packet_bytes,
2350                      ip0->flags_and_fragment_offset &
2351                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2352                      next + 0, is_midchain, &error0);
2353
2354       if (is_mcast)
2355         {
2356           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2357                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2358                     IP4_ERROR_SAME_INTERFACE : error0);
2359         }
2360
2361       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2362        * to see the IP header */
2363       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2364         {
2365           u32 next_index = adj0[0].rewrite_header.next_index;
2366           vlib_buffer_advance (b[0], -(word) rw_len0);
2367           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2368           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2369
2370           if (PREDICT_FALSE
2371               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2372             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2373                                                 tx_sw_if_index0,
2374                                                 &next_index, b[0],
2375                                                 adj0->ia_cfg_index);
2376           next[0] = next_index;
2377
2378           if (is_midchain)
2379             {
2380               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2381                                           0 /* is_ip6 */ );
2382
2383               /* Guess we are only writing on ipv4 header. */
2384               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2385             }
2386           else
2387             /* Guess we are only writing on simple Ethernet header. */
2388             vnet_rewrite_one_header (adj0[0], ip0,
2389                                      sizeof (ethernet_header_t));
2390
2391           /*
2392            * Bump the per-adjacency counters
2393            */
2394           if (do_counters)
2395             vlib_increment_combined_counter
2396               (&adjacency_counters,
2397                thread_index,
2398                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2399                                                            b[0]) + rw_len0);
2400
2401           if (is_midchain)
2402             adj_midchain_fixup (vm, adj0, b[0]);
2403
2404           if (is_mcast)
2405             /* copy bytes from the IP address into the MAC rewrite */
2406             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2407                                         adj0->rewrite_header.dst_mcast_offset,
2408                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2409         }
2410       else
2411         {
2412           b[0]->error = error_node->errors[error0];
2413           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2414             ip4_ttl_inc (b[0], ip0);
2415         }
2416
2417       next += 1;
2418       b += 1;
2419       n_left_from -= 1;
2420     }
2421 #endif
2422
2423   while (n_left_from > 0)
2424     {
2425       ip_adjacency_t *adj0;
2426       ip4_header_t *ip0;
2427       u32 rw_len0, adj_index0, error0;
2428       u32 tx_sw_if_index0;
2429
2430       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2431
2432       adj0 = adj_get (adj_index0);
2433
2434       if (do_counters)
2435         vlib_prefetch_combined_counter (&adjacency_counters,
2436                                         thread_index, adj_index0);
2437
2438       ip0 = vlib_buffer_get_current (b[0]);
2439
2440       error0 = IP4_ERROR_NONE;
2441
2442       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2443
2444
2445       /* Update packet buffer attributes/set output interface. */
2446       rw_len0 = adj0[0].rewrite_header.data_bytes;
2447       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2448
2449       /* Check MTU of outgoing interface. */
2450       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2451       if (b[0]->flags & VNET_BUFFER_F_GSO)
2452         ip0_len = gso_mtu_sz (b[0]);
2453
2454       ip4_mtu_check (b[0], ip0_len,
2455                      adj0[0].rewrite_header.max_l3_packet_bytes,
2456                      ip0->flags_and_fragment_offset &
2457                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2458                      next + 0, is_midchain, &error0);
2459
2460       if (is_mcast)
2461         {
2462           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2463                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2464                     IP4_ERROR_SAME_INTERFACE : error0);
2465         }
2466
2467       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2468        * to see the IP header */
2469       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2470         {
2471           u32 next_index = adj0[0].rewrite_header.next_index;
2472           vlib_buffer_advance (b[0], -(word) rw_len0);
2473           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2474           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2475
2476           if (PREDICT_FALSE
2477               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2478             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2479                                                 tx_sw_if_index0,
2480                                                 &next_index, b[0],
2481                                                 adj0->ia_cfg_index);
2482           next[0] = next_index;
2483
2484           if (is_midchain)
2485             {
2486               /* this acts on the packet that is about to be encapped */
2487               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2488                                           0 /* is_ip6 */ );
2489
2490               /* Guess we are only writing on ipv4 header. */
2491               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2492             }
2493           else
2494             /* Guess we are only writing on simple Ethernet header. */
2495             vnet_rewrite_one_header (adj0[0], ip0,
2496                                      sizeof (ethernet_header_t));
2497
2498           if (do_counters)
2499             vlib_increment_combined_counter
2500               (&adjacency_counters,
2501                thread_index, adj_index0, 1,
2502                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2503
2504           if (is_midchain)
2505             adj_midchain_fixup (vm, adj0, b[0]);
2506
2507           if (is_mcast)
2508             /* copy bytes from the IP address into the MAC rewrite */
2509             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2510                                         adj0->rewrite_header.dst_mcast_offset,
2511                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2512         }
2513       else
2514         {
2515           b[0]->error = error_node->errors[error0];
2516           /* undo the TTL decrement - we'll be back to do it again */
2517           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2518             ip4_ttl_inc (b[0], ip0);
2519         }
2520
2521       next += 1;
2522       b += 1;
2523       n_left_from -= 1;
2524     }
2525
2526
2527   /* Need to do trace after rewrites to pick up new packet data. */
2528   if (node->flags & VLIB_NODE_FLAG_TRACE)
2529     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2530
2531   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2532   return frame->n_vectors;
2533 }
2534
2535 always_inline uword
2536 ip4_rewrite_inline (vlib_main_t * vm,
2537                     vlib_node_runtime_t * node,
2538                     vlib_frame_t * frame,
2539                     int do_counters, int is_midchain, int is_mcast)
2540 {
2541   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2542                                       is_midchain, is_mcast);
2543 }
2544
2545
2546 /** @brief IPv4 rewrite node.
2547     @node ip4-rewrite
2548
2549     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2550     header checksum, fetch the ip adjacency, check the outbound mtu,
2551     apply the adjacency rewrite, and send pkts to the adjacency
2552     rewrite header's rewrite_next_index.
2553
2554     @param vm vlib_main_t corresponding to the current thread
2555     @param node vlib_node_runtime_t
2556     @param frame vlib_frame_t whose contents should be dispatched
2557
2558     @par Graph mechanics: buffer metadata, next index usage
2559
2560     @em Uses:
2561     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2562         - the rewrite adjacency index
2563     - <code>adj->lookup_next_index</code>
2564         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2565           the packet will be dropped.
2566     - <code>adj->rewrite_header</code>
2567         - Rewrite string length, rewrite string, next_index
2568
2569     @em Sets:
2570     - <code>b->current_data, b->current_length</code>
2571         - Updated net of applying the rewrite string
2572
2573     <em>Next Indices:</em>
2574     - <code> adj->rewrite_header.next_index </code>
2575       or @c ip4-drop
2576 */
2577
2578 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2579                                  vlib_frame_t * frame)
2580 {
2581   if (adj_are_counters_enabled ())
2582     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2583   else
2584     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2585 }
2586
2587 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2588                                        vlib_node_runtime_t * node,
2589                                        vlib_frame_t * frame)
2590 {
2591   if (adj_are_counters_enabled ())
2592     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2593   else
2594     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2595 }
2596
2597 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2598                                   vlib_node_runtime_t * node,
2599                                   vlib_frame_t * frame)
2600 {
2601   if (adj_are_counters_enabled ())
2602     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2603   else
2604     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2605 }
2606
2607 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2608                                        vlib_node_runtime_t * node,
2609                                        vlib_frame_t * frame)
2610 {
2611   if (adj_are_counters_enabled ())
2612     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2613   else
2614     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2615 }
2616
2617 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2618                                         vlib_node_runtime_t * node,
2619                                         vlib_frame_t * frame)
2620 {
2621   if (adj_are_counters_enabled ())
2622     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2623   else
2624     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2625 }
2626
2627 /* *INDENT-OFF* */
2628 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2629   .name = "ip4-rewrite",
2630   .vector_size = sizeof (u32),
2631
2632   .format_trace = format_ip4_rewrite_trace,
2633
2634   .n_next_nodes = IP4_REWRITE_N_NEXT,
2635   .next_nodes = {
2636     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2637     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2638     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2639   },
2640 };
2641
2642 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2643   .name = "ip4-rewrite-bcast",
2644   .vector_size = sizeof (u32),
2645
2646   .format_trace = format_ip4_rewrite_trace,
2647   .sibling_of = "ip4-rewrite",
2648 };
2649
2650 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2651   .name = "ip4-rewrite-mcast",
2652   .vector_size = sizeof (u32),
2653
2654   .format_trace = format_ip4_rewrite_trace,
2655   .sibling_of = "ip4-rewrite",
2656 };
2657
2658 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2659   .name = "ip4-mcast-midchain",
2660   .vector_size = sizeof (u32),
2661
2662   .format_trace = format_ip4_rewrite_trace,
2663   .sibling_of = "ip4-rewrite",
2664 };
2665
2666 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2667   .name = "ip4-midchain",
2668   .vector_size = sizeof (u32),
2669   .format_trace = format_ip4_rewrite_trace,
2670   .sibling_of = "ip4-rewrite",
2671 };
2672 /* *INDENT-ON */
2673
2674 static int
2675 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2676 {
2677   ip4_fib_mtrie_t *mtrie0;
2678   ip4_fib_mtrie_leaf_t leaf0;
2679   u32 lbi0;
2680
2681   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2682
2683   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2684   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2685   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2686
2687   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2688
2689   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2690 }
2691
2692 static clib_error_t *
2693 test_lookup_command_fn (vlib_main_t * vm,
2694                         unformat_input_t * input, vlib_cli_command_t * cmd)
2695 {
2696   ip4_fib_t *fib;
2697   u32 table_id = 0;
2698   f64 count = 1;
2699   u32 n;
2700   int i;
2701   ip4_address_t ip4_base_address;
2702   u64 errors = 0;
2703
2704   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2705     {
2706       if (unformat (input, "table %d", &table_id))
2707         {
2708           /* Make sure the entry exists. */
2709           fib = ip4_fib_get (table_id);
2710           if ((fib) && (fib->index != table_id))
2711             return clib_error_return (0, "<fib-index> %d does not exist",
2712                                       table_id);
2713         }
2714       else if (unformat (input, "count %f", &count))
2715         ;
2716
2717       else if (unformat (input, "%U",
2718                          unformat_ip4_address, &ip4_base_address))
2719         ;
2720       else
2721         return clib_error_return (0, "unknown input `%U'",
2722                                   format_unformat_error, input);
2723     }
2724
2725   n = count;
2726
2727   for (i = 0; i < n; i++)
2728     {
2729       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2730         errors++;
2731
2732       ip4_base_address.as_u32 =
2733         clib_host_to_net_u32 (1 +
2734                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2735     }
2736
2737   if (errors)
2738     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2739   else
2740     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2741
2742   return 0;
2743 }
2744
2745 /*?
2746  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2747  * given FIB table to determine if there is a conflict with the
2748  * adjacency table. The fib-id can be determined by using the
2749  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2750  * of 0 is used.
2751  *
2752  * @todo This command uses fib-id, other commands use table-id (not
2753  * just a name, they are different indexes). Would like to change this
2754  * to table-id for consistency.
2755  *
2756  * @cliexpar
2757  * Example of how to run the test lookup command:
2758  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2759  * No errors in 2 lookups
2760  * @cliexend
2761 ?*/
2762 /* *INDENT-OFF* */
2763 VLIB_CLI_COMMAND (lookup_test_command, static) =
2764 {
2765   .path = "test lookup",
2766   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2767   .function = test_lookup_command_fn,
2768 };
2769 /* *INDENT-ON* */
2770
2771 #ifndef CLIB_MARCH_VARIANT
2772 int
2773 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2774 {
2775   u32 fib_index;
2776
2777   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2778
2779   if (~0 == fib_index)
2780     return VNET_API_ERROR_NO_SUCH_FIB;
2781
2782   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2783                                   flow_hash_config);
2784
2785   return 0;
2786 }
2787 #endif
2788
2789 static clib_error_t *
2790 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2791                              unformat_input_t * input,
2792                              vlib_cli_command_t * cmd)
2793 {
2794   int matched = 0;
2795   u32 table_id = 0;
2796   u32 flow_hash_config = 0;
2797   int rv;
2798
2799   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2800     {
2801       if (unformat (input, "table %d", &table_id))
2802         matched = 1;
2803 #define _(a,v) \
2804     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2805       foreach_flow_hash_bit
2806 #undef _
2807         else
2808         break;
2809     }
2810
2811   if (matched == 0)
2812     return clib_error_return (0, "unknown input `%U'",
2813                               format_unformat_error, input);
2814
2815   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2816   switch (rv)
2817     {
2818     case 0:
2819       break;
2820
2821     case VNET_API_ERROR_NO_SUCH_FIB:
2822       return clib_error_return (0, "no such FIB table %d", table_id);
2823
2824     default:
2825       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2826       break;
2827     }
2828
2829   return 0;
2830 }
2831
2832 /*?
2833  * Configure the set of IPv4 fields used by the flow hash.
2834  *
2835  * @cliexpar
2836  * Example of how to set the flow hash on a given table:
2837  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2838  * Example of display the configured flow hash:
2839  * @cliexstart{show ip fib}
2840  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2841  * 0.0.0.0/0
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2844  *     [0] [@0]: dpo-drop ip6
2845  * 0.0.0.0/32
2846  *   unicast-ip4-chain
2847  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2848  *     [0] [@0]: dpo-drop ip6
2849  * 224.0.0.0/8
2850  *   unicast-ip4-chain
2851  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2852  *     [0] [@0]: dpo-drop ip6
2853  * 6.0.1.2/32
2854  *   unicast-ip4-chain
2855  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2856  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2857  * 7.0.0.1/32
2858  *   unicast-ip4-chain
2859  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2860  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2861  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2862  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2863  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2864  * 240.0.0.0/8
2865  *   unicast-ip4-chain
2866  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2867  *     [0] [@0]: dpo-drop ip6
2868  * 255.255.255.255/32
2869  *   unicast-ip4-chain
2870  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2871  *     [0] [@0]: dpo-drop ip6
2872  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2873  * 0.0.0.0/0
2874  *   unicast-ip4-chain
2875  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2876  *     [0] [@0]: dpo-drop ip6
2877  * 0.0.0.0/32
2878  *   unicast-ip4-chain
2879  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2880  *     [0] [@0]: dpo-drop ip6
2881  * 172.16.1.0/24
2882  *   unicast-ip4-chain
2883  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2884  *     [0] [@4]: ipv4-glean: af_packet0
2885  * 172.16.1.1/32
2886  *   unicast-ip4-chain
2887  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2888  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2889  * 172.16.1.2/32
2890  *   unicast-ip4-chain
2891  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2892  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2893  * 172.16.2.0/24
2894  *   unicast-ip4-chain
2895  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2896  *     [0] [@4]: ipv4-glean: af_packet1
2897  * 172.16.2.1/32
2898  *   unicast-ip4-chain
2899  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2900  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2901  * 224.0.0.0/8
2902  *   unicast-ip4-chain
2903  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2904  *     [0] [@0]: dpo-drop ip6
2905  * 240.0.0.0/8
2906  *   unicast-ip4-chain
2907  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2908  *     [0] [@0]: dpo-drop ip6
2909  * 255.255.255.255/32
2910  *   unicast-ip4-chain
2911  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2912  *     [0] [@0]: dpo-drop ip6
2913  * @cliexend
2914 ?*/
2915 /* *INDENT-OFF* */
2916 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2917 {
2918   .path = "set ip flow-hash",
2919   .short_help =
2920   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2921   .function = set_ip_flow_hash_command_fn,
2922 };
2923 /* *INDENT-ON* */
2924
2925 #ifndef CLIB_MARCH_VARIANT
2926 int
2927 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2928                              u32 table_index)
2929 {
2930   vnet_main_t *vnm = vnet_get_main ();
2931   vnet_interface_main_t *im = &vnm->interface_main;
2932   ip4_main_t *ipm = &ip4_main;
2933   ip_lookup_main_t *lm = &ipm->lookup_main;
2934   vnet_classify_main_t *cm = &vnet_classify_main;
2935   ip4_address_t *if_addr;
2936
2937   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2938     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2939
2940   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2941     return VNET_API_ERROR_NO_SUCH_ENTRY;
2942
2943   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2944   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2945
2946   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2947
2948   if (NULL != if_addr)
2949     {
2950       fib_prefix_t pfx = {
2951         .fp_len = 32,
2952         .fp_proto = FIB_PROTOCOL_IP4,
2953         .fp_addr.ip4 = *if_addr,
2954       };
2955       u32 fib_index;
2956
2957       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2958                                                        sw_if_index);
2959
2960
2961       if (table_index != (u32) ~ 0)
2962         {
2963           dpo_id_t dpo = DPO_INVALID;
2964
2965           dpo_set (&dpo,
2966                    DPO_CLASSIFY,
2967                    DPO_PROTO_IP4,
2968                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2969
2970           fib_table_entry_special_dpo_add (fib_index,
2971                                            &pfx,
2972                                            FIB_SOURCE_CLASSIFY,
2973                                            FIB_ENTRY_FLAG_NONE, &dpo);
2974           dpo_reset (&dpo);
2975         }
2976       else
2977         {
2978           fib_table_entry_special_remove (fib_index,
2979                                           &pfx, FIB_SOURCE_CLASSIFY);
2980         }
2981     }
2982
2983   return 0;
2984 }
2985 #endif
2986
2987 static clib_error_t *
2988 set_ip_classify_command_fn (vlib_main_t * vm,
2989                             unformat_input_t * input,
2990                             vlib_cli_command_t * cmd)
2991 {
2992   u32 table_index = ~0;
2993   int table_index_set = 0;
2994   u32 sw_if_index = ~0;
2995   int rv;
2996
2997   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2998     {
2999       if (unformat (input, "table-index %d", &table_index))
3000         table_index_set = 1;
3001       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3002                          vnet_get_main (), &sw_if_index))
3003         ;
3004       else
3005         break;
3006     }
3007
3008   if (table_index_set == 0)
3009     return clib_error_return (0, "classify table-index must be specified");
3010
3011   if (sw_if_index == ~0)
3012     return clib_error_return (0, "interface / subif must be specified");
3013
3014   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3015
3016   switch (rv)
3017     {
3018     case 0:
3019       break;
3020
3021     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3022       return clib_error_return (0, "No such interface");
3023
3024     case VNET_API_ERROR_NO_SUCH_ENTRY:
3025       return clib_error_return (0, "No such classifier table");
3026     }
3027   return 0;
3028 }
3029
3030 /*?
3031  * Assign a classification table to an interface. The classification
3032  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3033  * commands. Once the table is create, use this command to filter packets
3034  * on an interface.
3035  *
3036  * @cliexpar
3037  * Example of how to assign a classification table to an interface:
3038  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3039 ?*/
3040 /* *INDENT-OFF* */
3041 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3042 {
3043     .path = "set ip classify",
3044     .short_help =
3045     "set ip classify intfc <interface> table-index <classify-idx>",
3046     .function = set_ip_classify_command_fn,
3047 };
3048 /* *INDENT-ON* */
3049
3050 /*
3051  * fd.io coding-style-patch-verification: ON
3052  *
3053  * Local Variables:
3054  * eval: (c-set-style "gnu")
3055  * End:
3056  */