ip4_load_balance: leverage vlib_get_buffers
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left_from, n_left_to_next, *from, *to_next;
120   ip_lookup_next_t next;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123
124   from = vlib_frame_vector_args (frame);
125   n_left_from = frame->n_vectors;
126   next = node->cached_next_index;
127   vlib_get_buffers (vm, from, bufs, n_left_from);
128
129   while (n_left_from > 0)
130     {
131       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
132
133       while (n_left_from >= 4 && n_left_to_next >= 2)
134         {
135           ip_lookup_next_t next0, next1;
136           const load_balance_t *lb0, *lb1;
137           vlib_buffer_t *p0, *p1;
138           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
139           const ip4_header_t *ip0, *ip1;
140           const dpo_id_t *dpo0, *dpo1;
141
142           /* Prefetch next iteration. */
143           {
144             vlib_prefetch_buffer_header (b[2], STORE);
145             vlib_prefetch_buffer_header (b[3], STORE);
146
147             CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE);
148             CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE);
149           }
150
151           pi0 = to_next[0] = from[0];
152           pi1 = to_next[1] = from[1];
153
154           from += 2;
155           n_left_from -= 2;
156           to_next += 2;
157           n_left_to_next -= 2;
158
159           p0 = b[0];
160           p1 = b[1];
161           b += 2;
162
163           ip0 = vlib_buffer_get_current (p0);
164           ip1 = vlib_buffer_get_current (p1);
165           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
166           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
167
168           lb0 = load_balance_get (lbi0);
169           lb1 = load_balance_get (lbi1);
170
171           /*
172            * this node is for via FIBs we can re-use the hash value from the
173            * to node if present.
174            * We don't want to use the same hash value at each level in the recursion
175            * graph as that would lead to polarisation
176            */
177           hc0 = hc1 = 0;
178
179           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
180             {
181               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
182                 {
183                   hc0 = vnet_buffer (p0)->ip.flow_hash =
184                     vnet_buffer (p0)->ip.flow_hash >> 1;
185                 }
186               else
187                 {
188                   hc0 = vnet_buffer (p0)->ip.flow_hash =
189                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
190                 }
191               dpo0 = load_balance_get_fwd_bucket
192                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
193             }
194           else
195             {
196               dpo0 = load_balance_get_bucket_i (lb0, 0);
197             }
198           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
199             {
200               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
201                 {
202                   hc1 = vnet_buffer (p1)->ip.flow_hash =
203                     vnet_buffer (p1)->ip.flow_hash >> 1;
204                 }
205               else
206                 {
207                   hc1 = vnet_buffer (p1)->ip.flow_hash =
208                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
209                 }
210               dpo1 = load_balance_get_fwd_bucket
211                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
212             }
213           else
214             {
215               dpo1 = load_balance_get_bucket_i (lb1, 0);
216             }
217
218           next0 = dpo0->dpoi_next_node;
219           next1 = dpo1->dpoi_next_node;
220
221           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
222           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
223
224           vlib_increment_combined_counter
225             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
226           vlib_increment_combined_counter
227             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
228
229           vlib_validate_buffer_enqueue_x2 (vm, node, next,
230                                            to_next, n_left_to_next,
231                                            pi0, pi1, next0, next1);
232         }
233
234       while (n_left_from > 0 && n_left_to_next > 0)
235         {
236           ip_lookup_next_t next0;
237           const load_balance_t *lb0;
238           vlib_buffer_t *p0;
239           u32 pi0, lbi0, hc0;
240           const ip4_header_t *ip0;
241           const dpo_id_t *dpo0;
242
243           pi0 = from[0];
244           to_next[0] = pi0;
245           from += 1;
246           to_next += 1;
247           n_left_to_next -= 1;
248           n_left_from -= 1;
249
250           p0 = b[0];
251           b += 1;
252
253           ip0 = vlib_buffer_get_current (p0);
254           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
255
256           lb0 = load_balance_get (lbi0);
257
258           hc0 = 0;
259           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
260             {
261               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
262                 {
263                   hc0 = vnet_buffer (p0)->ip.flow_hash =
264                     vnet_buffer (p0)->ip.flow_hash >> 1;
265                 }
266               else
267                 {
268                   hc0 = vnet_buffer (p0)->ip.flow_hash =
269                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
270                 }
271               dpo0 = load_balance_get_fwd_bucket
272                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo0 = load_balance_get_bucket_i (lb0, 0);
277             }
278
279           next0 = dpo0->dpoi_next_node;
280           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
281
282           vlib_increment_combined_counter
283             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
284
285           vlib_validate_buffer_enqueue_x1 (vm, node, next,
286                                            to_next, n_left_to_next,
287                                            pi0, next0);
288         }
289
290       vlib_put_next_frame (vm, node, next, n_left_to_next);
291     }
292
293   if (node->flags & VLIB_NODE_FLAG_TRACE)
294     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
295
296   return frame->n_vectors;
297 }
298
299 /* *INDENT-OFF* */
300 VLIB_REGISTER_NODE (ip4_load_balance_node) =
301 {
302   .name = "ip4-load-balance",
303   .vector_size = sizeof (u32),
304   .sibling_of = "ip4-lookup",
305   .format_trace = format_ip4_lookup_trace,
306 };
307 /* *INDENT-ON* */
308
309 #ifndef CLIB_MARCH_VARIANT
310 /* get first interface address */
311 ip4_address_t *
312 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
313                              ip_interface_address_t ** result_ia)
314 {
315   ip_lookup_main_t *lm = &im->lookup_main;
316   ip_interface_address_t *ia = 0;
317   ip4_address_t *result = 0;
318
319   /* *INDENT-OFF* */
320   foreach_ip_interface_address
321     (lm, ia, sw_if_index,
322      1 /* honor unnumbered */ ,
323      ({
324        ip4_address_t * a =
325          ip_interface_address_get_address (lm, ia);
326        result = a;
327        break;
328      }));
329   /* *INDENT-OFF* */
330   if (result_ia)
331     *result_ia = result ? ia : 0;
332   return result;
333 }
334
335 static void
336 ip4_add_subnet_bcast_route (u32 fib_index,
337                             fib_prefix_t *pfx,
338                             u32 sw_if_index)
339 {
340   vnet_sw_interface_flags_t iflags;
341
342   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
343
344   fib_table_entry_special_remove(fib_index,
345                                  pfx,
346                                  FIB_SOURCE_INTERFACE);
347
348   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
349     {
350       fib_table_entry_update_one_path (fib_index, pfx,
351                                        FIB_SOURCE_INTERFACE,
352                                        FIB_ENTRY_FLAG_NONE,
353                                        DPO_PROTO_IP4,
354                                        /* No next-hop address */
355                                        &ADJ_BCAST_ADDR,
356                                        sw_if_index,
357                                        // invalid FIB index
358                                        ~0,
359                                        1,
360                                        // no out-label stack
361                                        NULL,
362                                        FIB_ROUTE_PATH_FLAG_NONE);
363     }
364   else
365     {
366         fib_table_entry_special_add(fib_index,
367                                     pfx,
368                                     FIB_SOURCE_INTERFACE,
369                                     (FIB_ENTRY_FLAG_DROP |
370                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
371     }
372 }
373
374 static void
375 ip4_add_interface_routes (u32 sw_if_index,
376                           ip4_main_t * im, u32 fib_index,
377                           ip_interface_address_t * a)
378 {
379   ip_lookup_main_t *lm = &im->lookup_main;
380   ip4_address_t *address = ip_interface_address_get_address (lm, a);
381   fib_prefix_t pfx = {
382     .fp_len = a->address_length,
383     .fp_proto = FIB_PROTOCOL_IP4,
384     .fp_addr.ip4 = *address,
385   };
386
387   if (pfx.fp_len <= 30)
388     {
389       /* a /30 or shorter - add a glean for the network address */
390       fib_table_entry_update_one_path (fib_index, &pfx,
391                                        FIB_SOURCE_INTERFACE,
392                                        (FIB_ENTRY_FLAG_CONNECTED |
393                                         FIB_ENTRY_FLAG_ATTACHED),
394                                        DPO_PROTO_IP4,
395                                        /* No next-hop address */
396                                        NULL,
397                                        sw_if_index,
398                                        // invalid FIB index
399                                        ~0,
400                                        1,
401                                        // no out-label stack
402                                        NULL,
403                                        FIB_ROUTE_PATH_FLAG_NONE);
404
405       /* Add the two broadcast addresses as drop */
406       fib_prefix_t net_pfx = {
407         .fp_len = 32,
408         .fp_proto = FIB_PROTOCOL_IP4,
409         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
410       };
411       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
412         fib_table_entry_special_add(fib_index,
413                                     &net_pfx,
414                                     FIB_SOURCE_INTERFACE,
415                                     (FIB_ENTRY_FLAG_DROP |
416                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
417       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
418       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
419         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
420     }
421   else if (pfx.fp_len == 31)
422     {
423       u32 mask = clib_host_to_net_u32(1);
424       fib_prefix_t net_pfx = pfx;
425
426       net_pfx.fp_len = 32;
427       net_pfx.fp_addr.ip4.as_u32 ^= mask;
428
429       /* a /31 - add the other end as an attached host */
430       fib_table_entry_update_one_path (fib_index, &net_pfx,
431                                        FIB_SOURCE_INTERFACE,
432                                        (FIB_ENTRY_FLAG_ATTACHED),
433                                        DPO_PROTO_IP4,
434                                        &net_pfx.fp_addr,
435                                        sw_if_index,
436                                        // invalid FIB index
437                                        ~0,
438                                        1,
439                                        NULL,
440                                        FIB_ROUTE_PATH_FLAG_NONE);
441     }
442   pfx.fp_len = 32;
443
444   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
445     {
446       u32 classify_table_index =
447         lm->classify_table_index_by_sw_if_index[sw_if_index];
448       if (classify_table_index != (u32) ~ 0)
449         {
450           dpo_id_t dpo = DPO_INVALID;
451
452           dpo_set (&dpo,
453                    DPO_CLASSIFY,
454                    DPO_PROTO_IP4,
455                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
456
457           fib_table_entry_special_dpo_add (fib_index,
458                                            &pfx,
459                                            FIB_SOURCE_CLASSIFY,
460                                            FIB_ENTRY_FLAG_NONE, &dpo);
461           dpo_reset (&dpo);
462         }
463     }
464
465   fib_table_entry_update_one_path (fib_index, &pfx,
466                                    FIB_SOURCE_INTERFACE,
467                                    (FIB_ENTRY_FLAG_CONNECTED |
468                                     FIB_ENTRY_FLAG_LOCAL),
469                                    DPO_PROTO_IP4,
470                                    &pfx.fp_addr,
471                                    sw_if_index,
472                                    // invalid FIB index
473                                    ~0,
474                                    1, NULL,
475                                    FIB_ROUTE_PATH_FLAG_NONE);
476 }
477
478 static void
479 ip4_del_interface_routes (ip4_main_t * im,
480                           u32 fib_index,
481                           ip4_address_t * address, u32 address_length)
482 {
483   fib_prefix_t pfx = {
484     .fp_len = address_length,
485     .fp_proto = FIB_PROTOCOL_IP4,
486     .fp_addr.ip4 = *address,
487   };
488
489   if (pfx.fp_len <= 30)
490     {
491       fib_prefix_t net_pfx = {
492         .fp_len = 32,
493         .fp_proto = FIB_PROTOCOL_IP4,
494         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
495       };
496       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
497         fib_table_entry_special_remove(fib_index,
498                                        &net_pfx,
499                                        FIB_SOURCE_INTERFACE);
500       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
501       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
502         fib_table_entry_special_remove(fib_index,
503                                        &net_pfx,
504                                        FIB_SOURCE_INTERFACE);
505       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
506     }
507     else if (pfx.fp_len == 31)
508     {
509       u32 mask = clib_host_to_net_u32(1);
510       fib_prefix_t net_pfx = pfx;
511
512       net_pfx.fp_len = 32;
513       net_pfx.fp_addr.ip4.as_u32 ^= mask;
514
515       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
516     }
517
518   pfx.fp_len = 32;
519   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
520 }
521
522 void
523 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
524 {
525   ip4_main_t *im = &ip4_main;
526
527   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
528
529   /*
530    * enable/disable only on the 1<->0 transition
531    */
532   if (is_enable)
533     {
534       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
535         return;
536     }
537   else
538     {
539       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
540       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
541         return;
542     }
543   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
544                                !is_enable, 0, 0);
545
546
547   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
548                                sw_if_index, !is_enable, 0, 0);
549 }
550
551 static clib_error_t *
552 ip4_add_del_interface_address_internal (vlib_main_t * vm,
553                                         u32 sw_if_index,
554                                         ip4_address_t * address,
555                                         u32 address_length, u32 is_del)
556 {
557   vnet_main_t *vnm = vnet_get_main ();
558   ip4_main_t *im = &ip4_main;
559   ip_lookup_main_t *lm = &im->lookup_main;
560   clib_error_t *error = 0;
561   u32 if_address_index, elts_before;
562   ip4_address_fib_t ip4_af, *addr_fib = 0;
563
564   /* local0 interface doesn't support IP addressing  */
565   if (sw_if_index == 0)
566     {
567       return
568        clib_error_create ("local0 interface doesn't support IP addressing");
569     }
570
571   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
572   ip4_addr_fib_init (&ip4_af, address,
573                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
574   vec_add1 (addr_fib, ip4_af);
575
576   /*
577    * there is no support for adj-fib handling in the presence of overlapping
578    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
579    * most routers do.
580    */
581   /* *INDENT-OFF* */
582   if (!is_del)
583     {
584       /* When adding an address check that it does not conflict
585          with an existing address on any interface in this table. */
586       ip_interface_address_t *ia;
587       vnet_sw_interface_t *sif;
588
589       pool_foreach(sif, vnm->interface_main.sw_interfaces,
590       ({
591           if (im->fib_index_by_sw_if_index[sw_if_index] ==
592               im->fib_index_by_sw_if_index[sif->sw_if_index])
593             {
594               foreach_ip_interface_address
595                 (&im->lookup_main, ia, sif->sw_if_index,
596                  0 /* honor unnumbered */ ,
597                  ({
598                    ip4_address_t * x =
599                      ip_interface_address_get_address
600                      (&im->lookup_main, ia);
601                    if (ip4_destination_matches_route
602                        (im, address, x, ia->address_length) ||
603                        ip4_destination_matches_route (im,
604                                                       x,
605                                                       address,
606                                                       address_length))
607                      {
608                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
609
610                        return
611                          clib_error_create
612                          ("failed to add %U which conflicts with %U for interface %U",
613                           format_ip4_address_and_length, address,
614                           address_length,
615                           format_ip4_address_and_length, x,
616                           ia->address_length,
617                           format_vnet_sw_if_index_name, vnm,
618                           sif->sw_if_index);
619                      }
620                  }));
621             }
622       }));
623     }
624   /* *INDENT-ON* */
625
626   elts_before = pool_elts (lm->if_address_pool);
627
628   error = ip_interface_address_add_del
629     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
630   if (error)
631     goto done;
632
633   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
634
635   if (is_del)
636     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
637   else
638     ip4_add_interface_routes (sw_if_index,
639                               im, ip4_af.fib_index,
640                               pool_elt_at_index
641                               (lm->if_address_pool, if_address_index));
642
643   /* If pool did not grow/shrink: add duplicate address. */
644   if (elts_before != pool_elts (lm->if_address_pool))
645     {
646       ip4_add_del_interface_address_callback_t *cb;
647       vec_foreach (cb, im->add_del_interface_address_callbacks)
648         cb->function (im, cb->function_opaque, sw_if_index,
649                       address, address_length, if_address_index, is_del);
650     }
651
652 done:
653   vec_free (addr_fib);
654   return error;
655 }
656
657 clib_error_t *
658 ip4_add_del_interface_address (vlib_main_t * vm,
659                                u32 sw_if_index,
660                                ip4_address_t * address,
661                                u32 address_length, u32 is_del)
662 {
663   return ip4_add_del_interface_address_internal
664     (vm, sw_if_index, address, address_length, is_del);
665 }
666
667 void
668 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
669 {
670   ip_interface_address_t *ia;
671   ip4_main_t *im;
672
673   im = &ip4_main;
674
675   /*
676    * when directed broadcast is enabled, the subnet braodcast route will forward
677    * packets using an adjacency with a broadcast MAC. otherwise it drops
678    */
679   /* *INDENT-OFF* */
680   foreach_ip_interface_address(&im->lookup_main, ia,
681                                sw_if_index, 0,
682      ({
683        if (ia->address_length <= 30)
684          {
685            ip4_address_t *ipa;
686
687            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
688
689            fib_prefix_t pfx = {
690              .fp_len = 32,
691              .fp_proto = FIB_PROTOCOL_IP4,
692              .fp_addr = {
693                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
694              },
695            };
696
697            ip4_add_subnet_bcast_route
698              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
699                                                   sw_if_index),
700               &pfx, sw_if_index);
701          }
702      }));
703   /* *INDENT-ON* */
704 }
705 #endif
706
707 /* Built-in ip4 unicast rx feature path definition */
708 /* *INDENT-OFF* */
709 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
710 {
711   .arc_name = "ip4-unicast",
712   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
713   .last_in_arc = "ip4-lookup",
714   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
715 };
716
717 VNET_FEATURE_INIT (ip4_flow_classify, static) =
718 {
719   .arc_name = "ip4-unicast",
720   .node_name = "ip4-flow-classify",
721   .runs_before = VNET_FEATURES ("ip4-inacl"),
722 };
723
724 VNET_FEATURE_INIT (ip4_inacl, static) =
725 {
726   .arc_name = "ip4-unicast",
727   .node_name = "ip4-inacl",
728   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
729 };
730
731 VNET_FEATURE_INIT (ip4_source_check_1, static) =
732 {
733   .arc_name = "ip4-unicast",
734   .node_name = "ip4-source-check-via-rx",
735   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
736 };
737
738 VNET_FEATURE_INIT (ip4_source_check_2, static) =
739 {
740   .arc_name = "ip4-unicast",
741   .node_name = "ip4-source-check-via-any",
742   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
743 };
744
745 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
746 {
747   .arc_name = "ip4-unicast",
748   .node_name = "ip4-source-and-port-range-check-rx",
749   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
750 };
751
752 VNET_FEATURE_INIT (ip4_policer_classify, static) =
753 {
754   .arc_name = "ip4-unicast",
755   .node_name = "ip4-policer-classify",
756   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
757 };
758
759 VNET_FEATURE_INIT (ip4_ipsec, static) =
760 {
761   .arc_name = "ip4-unicast",
762   .node_name = "ipsec4-input-feature",
763   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
764 };
765
766 VNET_FEATURE_INIT (ip4_vpath, static) =
767 {
768   .arc_name = "ip4-unicast",
769   .node_name = "vpath-input-ip4",
770   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
771 };
772
773 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
774 {
775   .arc_name = "ip4-unicast",
776   .node_name = "ip4-vxlan-bypass",
777   .runs_before = VNET_FEATURES ("ip4-lookup"),
778 };
779
780 VNET_FEATURE_INIT (ip4_not_enabled, static) =
781 {
782   .arc_name = "ip4-unicast",
783   .node_name = "ip4-not-enabled",
784   .runs_before = VNET_FEATURES ("ip4-lookup"),
785 };
786
787 VNET_FEATURE_INIT (ip4_lookup, static) =
788 {
789   .arc_name = "ip4-unicast",
790   .node_name = "ip4-lookup",
791   .runs_before = 0,     /* not before any other features */
792 };
793
794 /* Built-in ip4 multicast rx feature path definition */
795 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
796 {
797   .arc_name = "ip4-multicast",
798   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
799   .last_in_arc = "ip4-mfib-forward-lookup",
800   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
801 };
802
803 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
804 {
805   .arc_name = "ip4-multicast",
806   .node_name = "vpath-input-ip4",
807   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
808 };
809
810 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
811 {
812   .arc_name = "ip4-multicast",
813   .node_name = "ip4-not-enabled",
814   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
815 };
816
817 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
818 {
819   .arc_name = "ip4-multicast",
820   .node_name = "ip4-mfib-forward-lookup",
821   .runs_before = 0,     /* last feature */
822 };
823
824 /* Source and port-range check ip4 tx feature path definition */
825 VNET_FEATURE_ARC_INIT (ip4_output, static) =
826 {
827   .arc_name = "ip4-output",
828   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
829   .last_in_arc = "interface-output",
830   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
831 };
832
833 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
834 {
835   .arc_name = "ip4-output",
836   .node_name = "ip4-source-and-port-range-check-tx",
837   .runs_before = VNET_FEATURES ("ip4-outacl"),
838 };
839
840 VNET_FEATURE_INIT (ip4_outacl, static) =
841 {
842   .arc_name = "ip4-output",
843   .node_name = "ip4-outacl",
844   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
845 };
846
847 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
848 {
849   .arc_name = "ip4-output",
850   .node_name = "ipsec4-output-feature",
851   .runs_before = VNET_FEATURES ("interface-output"),
852 };
853
854 /* Built-in ip4 tx feature path definition */
855 VNET_FEATURE_INIT (ip4_interface_output, static) =
856 {
857   .arc_name = "ip4-output",
858   .node_name = "interface-output",
859   .runs_before = 0,     /* not before any other features */
860 };
861 /* *INDENT-ON* */
862
863 static clib_error_t *
864 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
865 {
866   ip4_main_t *im = &ip4_main;
867
868   /* Fill in lookup tables with default table (0). */
869   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
870   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
871
872   if (!is_add)
873     {
874       ip4_main_t *im4 = &ip4_main;
875       ip_lookup_main_t *lm4 = &im4->lookup_main;
876       ip_interface_address_t *ia = 0;
877       ip4_address_t *address;
878       vlib_main_t *vm = vlib_get_main ();
879
880       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
881       /* *INDENT-OFF* */
882       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
883       ({
884         address = ip_interface_address_get_address (lm4, ia);
885         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
886       }));
887       /* *INDENT-ON* */
888     }
889
890   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
891                                is_add, 0, 0);
892
893   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
894                                sw_if_index, is_add, 0, 0);
895
896   return /* no error */ 0;
897 }
898
899 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
900
901 /* Global IP4 main. */
902 #ifndef CLIB_MARCH_VARIANT
903 ip4_main_t ip4_main;
904 #endif /* CLIB_MARCH_VARIANT */
905
906 static clib_error_t *
907 ip4_lookup_init (vlib_main_t * vm)
908 {
909   ip4_main_t *im = &ip4_main;
910   clib_error_t *error;
911   uword i;
912
913   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
914     return error;
915   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
916     return (error);
917   if ((error = vlib_call_init_function (vm, fib_module_init)))
918     return error;
919   if ((error = vlib_call_init_function (vm, mfib_module_init)))
920     return error;
921
922   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
923     {
924       u32 m;
925
926       if (i < 32)
927         m = pow2_mask (i) << (32 - i);
928       else
929         m = ~0;
930       im->fib_masks[i] = clib_host_to_net_u32 (m);
931     }
932
933   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
934
935   /* Create FIB with index 0 and table id of 0. */
936   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
937                                      FIB_SOURCE_DEFAULT_ROUTE);
938   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
939                                       MFIB_SOURCE_DEFAULT_ROUTE);
940
941   {
942     pg_node_t *pn;
943     pn = pg_get_node (ip4_lookup_node.index);
944     pn->unformat_edit = unformat_pg_ip4_header;
945   }
946
947   {
948     ethernet_arp_header_t h;
949
950     clib_memset (&h, 0, sizeof (h));
951
952 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
953 #define _8(f,v) h.f = v;
954     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
955     _16 (l3_type, ETHERNET_TYPE_IP4);
956     _8 (n_l2_address_bytes, 6);
957     _8 (n_l3_address_bytes, 4);
958     _16 (opcode, ETHERNET_ARP_OPCODE_request);
959 #undef _16
960 #undef _8
961
962     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
963                                /* data */ &h,
964                                sizeof (h),
965                                /* alloc chunk size */ 8,
966                                "ip4 arp");
967   }
968
969   return error;
970 }
971
972 VLIB_INIT_FUNCTION (ip4_lookup_init);
973
974 typedef struct
975 {
976   /* Adjacency taken. */
977   u32 dpo_index;
978   u32 flow_hash;
979   u32 fib_index;
980
981   /* Packet data, possibly *after* rewrite. */
982   u8 packet_data[64 - 1 * sizeof (u32)];
983 }
984 ip4_forward_next_trace_t;
985
986 #ifndef CLIB_MARCH_VARIANT
987 u8 *
988 format_ip4_forward_next_trace (u8 * s, va_list * args)
989 {
990   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
991   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
992   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
993   u32 indent = format_get_indent (s);
994   s = format (s, "%U%U",
995               format_white_space, indent,
996               format_ip4_header, t->packet_data, sizeof (t->packet_data));
997   return s;
998 }
999 #endif
1000
1001 static u8 *
1002 format_ip4_lookup_trace (u8 * s, va_list * args)
1003 {
1004   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1005   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1006   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1007   u32 indent = format_get_indent (s);
1008
1009   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1010               t->fib_index, t->dpo_index, t->flow_hash);
1011   s = format (s, "\n%U%U",
1012               format_white_space, indent,
1013               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1014   return s;
1015 }
1016
1017 static u8 *
1018 format_ip4_rewrite_trace (u8 * s, va_list * args)
1019 {
1020   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1021   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1022   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1023   u32 indent = format_get_indent (s);
1024
1025   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1026               t->fib_index, t->dpo_index, format_ip_adjacency,
1027               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1028   s = format (s, "\n%U%U",
1029               format_white_space, indent,
1030               format_ip_adjacency_packet_data,
1031               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1032   return s;
1033 }
1034
1035 #ifndef CLIB_MARCH_VARIANT
1036 /* Common trace function for all ip4-forward next nodes. */
1037 void
1038 ip4_forward_next_trace (vlib_main_t * vm,
1039                         vlib_node_runtime_t * node,
1040                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1041 {
1042   u32 *from, n_left;
1043   ip4_main_t *im = &ip4_main;
1044
1045   n_left = frame->n_vectors;
1046   from = vlib_frame_vector_args (frame);
1047
1048   while (n_left >= 4)
1049     {
1050       u32 bi0, bi1;
1051       vlib_buffer_t *b0, *b1;
1052       ip4_forward_next_trace_t *t0, *t1;
1053
1054       /* Prefetch next iteration. */
1055       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1056       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1057
1058       bi0 = from[0];
1059       bi1 = from[1];
1060
1061       b0 = vlib_get_buffer (vm, bi0);
1062       b1 = vlib_get_buffer (vm, bi1);
1063
1064       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1065         {
1066           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1067           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1068           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1069           t0->fib_index =
1070             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1071              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1072             vec_elt (im->fib_index_by_sw_if_index,
1073                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1074
1075           clib_memcpy_fast (t0->packet_data,
1076                             vlib_buffer_get_current (b0),
1077                             sizeof (t0->packet_data));
1078         }
1079       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1080         {
1081           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1082           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1083           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1084           t1->fib_index =
1085             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1086              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1087             vec_elt (im->fib_index_by_sw_if_index,
1088                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1089           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1090                             sizeof (t1->packet_data));
1091         }
1092       from += 2;
1093       n_left -= 2;
1094     }
1095
1096   while (n_left >= 1)
1097     {
1098       u32 bi0;
1099       vlib_buffer_t *b0;
1100       ip4_forward_next_trace_t *t0;
1101
1102       bi0 = from[0];
1103
1104       b0 = vlib_get_buffer (vm, bi0);
1105
1106       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1107         {
1108           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1109           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1110           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1111           t0->fib_index =
1112             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1113              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1114             vec_elt (im->fib_index_by_sw_if_index,
1115                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1116           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1117                             sizeof (t0->packet_data));
1118         }
1119       from += 1;
1120       n_left -= 1;
1121     }
1122 }
1123
1124 /* Compute TCP/UDP/ICMP4 checksum in software. */
1125 u16
1126 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1127                               ip4_header_t * ip0)
1128 {
1129   ip_csum_t sum0;
1130   u32 ip_header_length, payload_length_host_byte_order;
1131   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1132   u16 sum16;
1133   void *data_this_buffer;
1134
1135   /* Initialize checksum with ip header. */
1136   ip_header_length = ip4_header_bytes (ip0);
1137   payload_length_host_byte_order =
1138     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1139   sum0 =
1140     clib_host_to_net_u32 (payload_length_host_byte_order +
1141                           (ip0->protocol << 16));
1142
1143   if (BITS (uword) == 32)
1144     {
1145       sum0 =
1146         ip_csum_with_carry (sum0,
1147                             clib_mem_unaligned (&ip0->src_address, u32));
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->dst_address, u32));
1151     }
1152   else
1153     sum0 =
1154       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1155
1156   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1157   data_this_buffer = (void *) ip0 + ip_header_length;
1158   n_ip_bytes_this_buffer =
1159     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1160   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1161     {
1162       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1163         n_ip_bytes_this_buffer - ip_header_length : 0;
1164     }
1165   while (1)
1166     {
1167       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1168       n_bytes_left -= n_this_buffer;
1169       if (n_bytes_left == 0)
1170         break;
1171
1172       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1173       p0 = vlib_get_buffer (vm, p0->next_buffer);
1174       data_this_buffer = vlib_buffer_get_current (p0);
1175       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1176     }
1177
1178   sum16 = ~ip_csum_fold (sum0);
1179
1180   return sum16;
1181 }
1182
1183 u32
1184 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1185 {
1186   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1187   udp_header_t *udp0;
1188   u16 sum16;
1189
1190   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1191           || ip0->protocol == IP_PROTOCOL_UDP);
1192
1193   udp0 = (void *) (ip0 + 1);
1194   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1195     {
1196       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1197                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1198       return p0->flags;
1199     }
1200
1201   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1202
1203   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1204                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1205
1206   return p0->flags;
1207 }
1208 #endif
1209
1210 /* *INDENT-OFF* */
1211 VNET_FEATURE_ARC_INIT (ip4_local) =
1212 {
1213   .arc_name  = "ip4-local",
1214   .start_nodes = VNET_FEATURES ("ip4-local"),
1215   .last_in_arc = "ip4-local-end-of-arc",
1216 };
1217 /* *INDENT-ON* */
1218
1219 static inline void
1220 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1221                             ip4_header_t * ip, u8 is_udp, u8 * error,
1222                             u8 * good_tcp_udp)
1223 {
1224   u32 flags0;
1225   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1226   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1227   if (is_udp)
1228     {
1229       udp_header_t *udp;
1230       u32 ip_len, udp_len;
1231       i32 len_diff;
1232       udp = ip4_next_header (ip);
1233       /* Verify UDP length. */
1234       ip_len = clib_net_to_host_u16 (ip->length);
1235       udp_len = clib_net_to_host_u16 (udp->length);
1236
1237       len_diff = ip_len - udp_len;
1238       *good_tcp_udp &= len_diff >= 0;
1239       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1240     }
1241 }
1242
1243 #define ip4_local_csum_is_offloaded(_b)                                 \
1244     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1245         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1246
1247 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1248     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1249         || ip4_local_csum_is_offloaded (_b)))
1250
1251 #define ip4_local_csum_is_valid(_b)                                     \
1252     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1253         || (ip4_local_csum_is_offloaded (_b))) != 0
1254
1255 static inline void
1256 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1257                          ip4_header_t * ih, u8 * error)
1258 {
1259   u8 is_udp, is_tcp_udp, good_tcp_udp;
1260
1261   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1262   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1263
1264   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1265     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1266   else
1267     good_tcp_udp = ip4_local_csum_is_valid (b);
1268
1269   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1270   *error = (is_tcp_udp && !good_tcp_udp
1271             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1272 }
1273
1274 static inline void
1275 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1276                             ip4_header_t ** ih, u8 * error)
1277 {
1278   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1279
1280   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1281   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1282
1283   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1284   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1285
1286   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1287   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1288
1289   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1290                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1291     {
1292       if (is_tcp_udp[0])
1293         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1294                                     &good_tcp_udp[0]);
1295       if (is_tcp_udp[1])
1296         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1297                                     &good_tcp_udp[1]);
1298     }
1299
1300   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1301               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1302   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1303               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1304 }
1305
1306 static inline void
1307 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1308                               vlib_buffer_t * b, u16 * next, u8 error,
1309                               u8 head_of_feature_arc)
1310 {
1311   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1312   u32 next_index;
1313
1314   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1315   b->error = error ? error_node->errors[error] : 0;
1316   if (head_of_feature_arc)
1317     {
1318       next_index = *next;
1319       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1320         {
1321           vnet_feature_arc_start (arc_index,
1322                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1323                                   &next_index, b);
1324           *next = next_index;
1325         }
1326     }
1327 }
1328
1329 typedef struct
1330 {
1331   ip4_address_t src;
1332   u32 lbi;
1333   u8 error;
1334   u8 first;
1335 } ip4_local_last_check_t;
1336
1337 static inline void
1338 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1339                      ip4_local_last_check_t * last_check, u8 * error0)
1340 {
1341   ip4_fib_mtrie_leaf_t leaf0;
1342   ip4_fib_mtrie_t *mtrie0;
1343   const dpo_id_t *dpo0;
1344   load_balance_t *lb0;
1345   u32 lbi0;
1346
1347   vnet_buffer (b)->ip.fib_index =
1348     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1349     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1350
1351   if (PREDICT_FALSE (last_check->first ||
1352                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1353     {
1354       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1355       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1356       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1357       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1358       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1359
1360       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1361       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1362
1363       lb0 = load_balance_get (lbi0);
1364       dpo0 = load_balance_get_bucket_i (lb0, 0);
1365
1366       /*
1367        * Must have a route to source otherwise we drop the packet.
1368        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1369        *
1370        * The checks are:
1371        *  - the source is a recieve => it's from us => bogus, do this
1372        *    first since it sets a different error code.
1373        *  - uRPF check for any route to source - accept if passes.
1374        *  - allow packets destined to the broadcast address from unknown sources
1375        */
1376
1377       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1378                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1379                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1380       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1381                   && !fib_urpf_check_size (lb0->lb_urpf)
1382                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1383                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1384
1385       last_check->src.as_u32 = ip0->src_address.as_u32;
1386       last_check->lbi = lbi0;
1387       last_check->error = *error0;
1388     }
1389   else
1390     {
1391       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1392       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1393       *error0 = last_check->error;
1394       last_check->first = 0;
1395     }
1396 }
1397
1398 static inline void
1399 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1400                         ip4_local_last_check_t * last_check, u8 * error)
1401 {
1402   ip4_fib_mtrie_leaf_t leaf[2];
1403   ip4_fib_mtrie_t *mtrie[2];
1404   const dpo_id_t *dpo[2];
1405   load_balance_t *lb[2];
1406   u32 not_last_hit;
1407   u32 lbi[2];
1408
1409   not_last_hit = last_check->first;
1410   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1411   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1412
1413   vnet_buffer (b[0])->ip.fib_index =
1414     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1415     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1416     vnet_buffer (b[0])->ip.fib_index;
1417
1418   vnet_buffer (b[1])->ip.fib_index =
1419     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1420     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1421     vnet_buffer (b[1])->ip.fib_index;
1422
1423   if (PREDICT_FALSE (not_last_hit))
1424     {
1425       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1426       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1427
1428       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1429       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1430
1431       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1432                                            &ip[0]->src_address, 2);
1433       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1434                                            &ip[1]->src_address, 2);
1435
1436       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1437                                            &ip[0]->src_address, 3);
1438       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1439                                            &ip[1]->src_address, 3);
1440
1441       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1442       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1443
1444       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1445       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1446
1447       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1448       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1449
1450       lb[0] = load_balance_get (lbi[0]);
1451       lb[1] = load_balance_get (lbi[1]);
1452
1453       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1454       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1455
1456       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1457                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1458                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1459       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1460                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1461                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1462                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1463
1464       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1465                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1466                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1467       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1468                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1469                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1470                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1471
1472       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1473       last_check->lbi = lbi[1];
1474       last_check->error = error[1];
1475     }
1476   else
1477     {
1478       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1479       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1480
1481       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1482       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1483
1484       error[0] = last_check->error;
1485       error[1] = last_check->error;
1486       last_check->first = 0;
1487     }
1488 }
1489
1490 enum ip_local_packet_type_e
1491 {
1492   IP_LOCAL_PACKET_TYPE_L4,
1493   IP_LOCAL_PACKET_TYPE_NAT,
1494   IP_LOCAL_PACKET_TYPE_FRAG,
1495 };
1496
1497 /**
1498  * Determine packet type and next node.
1499  *
1500  * The expectation is that all packets that are not L4 will skip
1501  * checksums and source checks.
1502  */
1503 always_inline u8
1504 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1505 {
1506   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1507
1508   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1509     {
1510       *next = IP_LOCAL_NEXT_REASSEMBLY;
1511       return IP_LOCAL_PACKET_TYPE_FRAG;
1512     }
1513   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1514     {
1515       *next = lm->local_next_by_ip_protocol[ip->protocol];
1516       return IP_LOCAL_PACKET_TYPE_NAT;
1517     }
1518
1519   *next = lm->local_next_by_ip_protocol[ip->protocol];
1520   return IP_LOCAL_PACKET_TYPE_L4;
1521 }
1522
1523 static inline uword
1524 ip4_local_inline (vlib_main_t * vm,
1525                   vlib_node_runtime_t * node,
1526                   vlib_frame_t * frame, int head_of_feature_arc)
1527 {
1528   u32 *from, n_left_from;
1529   vlib_node_runtime_t *error_node =
1530     vlib_node_get_runtime (vm, ip4_input_node.index);
1531   u16 nexts[VLIB_FRAME_SIZE], *next;
1532   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1533   ip4_header_t *ip[2];
1534   u8 error[2], pt[2];
1535
1536   ip4_local_last_check_t last_check = {
1537     /*
1538      * 0.0.0.0 can appear as the source address of an IP packet,
1539      * as can any other address, hence the need to use the 'first'
1540      * member to make sure the .lbi is initialised for the first
1541      * packet.
1542      */
1543     .src = {.as_u32 = 0},
1544     .lbi = ~0,
1545     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1546     .first = 1,
1547   };
1548
1549   from = vlib_frame_vector_args (frame);
1550   n_left_from = frame->n_vectors;
1551
1552   if (node->flags & VLIB_NODE_FLAG_TRACE)
1553     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1554
1555   vlib_get_buffers (vm, from, bufs, n_left_from);
1556   b = bufs;
1557   next = nexts;
1558
1559   while (n_left_from >= 6)
1560     {
1561       u8 not_batch = 0;
1562
1563       /* Prefetch next iteration. */
1564       {
1565         vlib_prefetch_buffer_header (b[4], LOAD);
1566         vlib_prefetch_buffer_header (b[5], LOAD);
1567
1568         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1569         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1570       }
1571
1572       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1573
1574       ip[0] = vlib_buffer_get_current (b[0]);
1575       ip[1] = vlib_buffer_get_current (b[1]);
1576
1577       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1578       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1579
1580       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1581       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1582
1583       not_batch = pt[0] ^ pt[1];
1584
1585       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1586         goto skip_checks;
1587
1588       if (PREDICT_TRUE (not_batch == 0))
1589         {
1590           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1591           ip4_local_check_src_x2 (b, ip, &last_check, error);
1592         }
1593       else
1594         {
1595           if (!pt[0])
1596             {
1597               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1598               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1599             }
1600           if (!pt[1])
1601             {
1602               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1603               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1604             }
1605         }
1606
1607     skip_checks:
1608
1609       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1610                                     head_of_feature_arc);
1611       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1612                                     head_of_feature_arc);
1613
1614       b += 2;
1615       next += 2;
1616       n_left_from -= 2;
1617     }
1618
1619   while (n_left_from > 0)
1620     {
1621       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1622
1623       ip[0] = vlib_buffer_get_current (b[0]);
1624       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1625       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1626
1627       if (head_of_feature_arc == 0 || pt[0])
1628         goto skip_check;
1629
1630       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1631       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1632
1633     skip_check:
1634
1635       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1636                                     head_of_feature_arc);
1637
1638       b += 1;
1639       next += 1;
1640       n_left_from -= 1;
1641     }
1642
1643   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1644   return frame->n_vectors;
1645 }
1646
1647 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1648                                vlib_frame_t * frame)
1649 {
1650   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1651 }
1652
1653 /* *INDENT-OFF* */
1654 VLIB_REGISTER_NODE (ip4_local_node) =
1655 {
1656   .name = "ip4-local",
1657   .vector_size = sizeof (u32),
1658   .format_trace = format_ip4_forward_next_trace,
1659   .n_next_nodes = IP_LOCAL_N_NEXT,
1660   .next_nodes =
1661   {
1662     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1663     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1664     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1665     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1666     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1667   },
1668 };
1669 /* *INDENT-ON* */
1670
1671
1672 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1673                                           vlib_node_runtime_t * node,
1674                                           vlib_frame_t * frame)
1675 {
1676   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1677 }
1678
1679 /* *INDENT-OFF* */
1680 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1681   .name = "ip4-local-end-of-arc",
1682   .vector_size = sizeof (u32),
1683
1684   .format_trace = format_ip4_forward_next_trace,
1685   .sibling_of = "ip4-local",
1686 };
1687
1688 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1689   .arc_name = "ip4-local",
1690   .node_name = "ip4-local-end-of-arc",
1691   .runs_before = 0, /* not before any other features */
1692 };
1693 /* *INDENT-ON* */
1694
1695 #ifndef CLIB_MARCH_VARIANT
1696 void
1697 ip4_register_protocol (u32 protocol, u32 node_index)
1698 {
1699   vlib_main_t *vm = vlib_get_main ();
1700   ip4_main_t *im = &ip4_main;
1701   ip_lookup_main_t *lm = &im->lookup_main;
1702
1703   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1704   lm->local_next_by_ip_protocol[protocol] =
1705     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1706 }
1707 #endif
1708
1709 static clib_error_t *
1710 show_ip_local_command_fn (vlib_main_t * vm,
1711                           unformat_input_t * input, vlib_cli_command_t * cmd)
1712 {
1713   ip4_main_t *im = &ip4_main;
1714   ip_lookup_main_t *lm = &im->lookup_main;
1715   int i;
1716
1717   vlib_cli_output (vm, "Protocols handled by ip4_local");
1718   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1719     {
1720       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1721         {
1722           u32 node_index = vlib_get_node (vm,
1723                                           ip4_local_node.index)->
1724             next_nodes[lm->local_next_by_ip_protocol[i]];
1725           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1726                            node_index);
1727         }
1728     }
1729   return 0;
1730 }
1731
1732
1733
1734 /*?
1735  * Display the set of protocols handled by the local IPv4 stack.
1736  *
1737  * @cliexpar
1738  * Example of how to display local protocol table:
1739  * @cliexstart{show ip local}
1740  * Protocols handled by ip4_local
1741  * 1
1742  * 17
1743  * 47
1744  * @cliexend
1745 ?*/
1746 /* *INDENT-OFF* */
1747 VLIB_CLI_COMMAND (show_ip_local, static) =
1748 {
1749   .path = "show ip local",
1750   .function = show_ip_local_command_fn,
1751   .short_help = "show ip local",
1752 };
1753 /* *INDENT-ON* */
1754
1755 always_inline uword
1756 ip4_arp_inline (vlib_main_t * vm,
1757                 vlib_node_runtime_t * node,
1758                 vlib_frame_t * frame, int is_glean)
1759 {
1760   vnet_main_t *vnm = vnet_get_main ();
1761   ip4_main_t *im = &ip4_main;
1762   ip_lookup_main_t *lm = &im->lookup_main;
1763   u32 *from, *to_next_drop;
1764   uword n_left_from, n_left_to_next_drop, next_index;
1765   u32 thread_index = vm->thread_index;
1766   u64 seed;
1767
1768   if (node->flags & VLIB_NODE_FLAG_TRACE)
1769     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1770
1771   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1772
1773   from = vlib_frame_vector_args (frame);
1774   n_left_from = frame->n_vectors;
1775   next_index = node->cached_next_index;
1776   if (next_index == IP4_ARP_NEXT_DROP)
1777     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1778
1779   while (n_left_from > 0)
1780     {
1781       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1782                            to_next_drop, n_left_to_next_drop);
1783
1784       while (n_left_from > 0 && n_left_to_next_drop > 0)
1785         {
1786           u32 pi0, bi0, adj_index0, sw_if_index0;
1787           ip_adjacency_t *adj0;
1788           vlib_buffer_t *p0, *b0;
1789           ip4_address_t resolve0;
1790           ethernet_arp_header_t *h0;
1791           vnet_hw_interface_t *hw_if0;
1792           u64 r0;
1793
1794           pi0 = from[0];
1795           p0 = vlib_get_buffer (vm, pi0);
1796
1797           from += 1;
1798           n_left_from -= 1;
1799           to_next_drop[0] = pi0;
1800           to_next_drop += 1;
1801           n_left_to_next_drop -= 1;
1802
1803           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1804           adj0 = adj_get (adj_index0);
1805
1806           if (is_glean)
1807             {
1808               /* resolve the packet's destination */
1809               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1810               resolve0 = ip0->dst_address;
1811             }
1812           else
1813             {
1814               /* resolve the incomplete adj */
1815               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1816             }
1817
1818           /* combine the address and interface for the hash key */
1819           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1820           r0 = (u64) resolve0.data_u32 << 32;
1821           r0 |= sw_if_index0;
1822
1823           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1824             {
1825               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1826               continue;
1827             }
1828
1829           /*
1830            * the adj has been updated to a rewrite but the node the DPO that got
1831            * us here hasn't - yet. no big deal. we'll drop while we wait.
1832            */
1833           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1834             {
1835               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1836               continue;
1837             }
1838
1839           /*
1840            * Can happen if the control-plane is programming tables
1841            * with traffic flowing; at least that's today's lame excuse.
1842            */
1843           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1844               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1845             {
1846               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1847               continue;
1848             }
1849           /* Send ARP request. */
1850           h0 =
1851             vlib_packet_template_get_packet (vm,
1852                                              &im->ip4_arp_request_packet_template,
1853                                              &bi0);
1854           b0 = vlib_get_buffer (vm, bi0);
1855
1856           /* copy the persistent fields from the original */
1857           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1858
1859           /* Seems we're out of buffers */
1860           if (PREDICT_FALSE (!h0))
1861             {
1862               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1863               continue;
1864             }
1865
1866           /* Add rewrite/encap string for ARP packet. */
1867           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1868
1869           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1870
1871           /* Src ethernet address in ARP header. */
1872           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1873                                   hw_if0->hw_address);
1874           if (is_glean)
1875             {
1876               /* The interface's source address is stashed in the Glean Adj */
1877               h0->ip4_over_ethernet[0].ip4 =
1878                 adj0->sub_type.glean.receive_addr.ip4;
1879             }
1880           else
1881             {
1882               /* Src IP address in ARP header. */
1883               if (ip4_src_address_for_packet (lm, sw_if_index0,
1884                                               &h0->ip4_over_ethernet[0].ip4))
1885                 {
1886                   /* No source address available */
1887                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1888                   vlib_buffer_free (vm, &bi0, 1);
1889                   continue;
1890                 }
1891             }
1892           h0->ip4_over_ethernet[1].ip4 = resolve0;
1893
1894           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1895
1896           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1897           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1898           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1899
1900           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1901
1902           vlib_set_next_frame_buffer (vm, node,
1903                                       adj0->rewrite_header.next_index, bi0);
1904         }
1905
1906       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1907     }
1908
1909   return frame->n_vectors;
1910 }
1911
1912 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1913                              vlib_frame_t * frame)
1914 {
1915   return (ip4_arp_inline (vm, node, frame, 0));
1916 }
1917
1918 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1919                                vlib_frame_t * frame)
1920 {
1921   return (ip4_arp_inline (vm, node, frame, 1));
1922 }
1923
1924 static char *ip4_arp_error_strings[] = {
1925   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1926   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1927   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1928   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1929   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1930   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1931 };
1932
1933 /* *INDENT-OFF* */
1934 VLIB_REGISTER_NODE (ip4_arp_node) =
1935 {
1936   .name = "ip4-arp",
1937   .vector_size = sizeof (u32),
1938   .format_trace = format_ip4_forward_next_trace,
1939   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1940   .error_strings = ip4_arp_error_strings,
1941   .n_next_nodes = IP4_ARP_N_NEXT,
1942   .next_nodes =
1943   {
1944     [IP4_ARP_NEXT_DROP] = "error-drop",
1945   },
1946 };
1947
1948 VLIB_REGISTER_NODE (ip4_glean_node) =
1949 {
1950   .name = "ip4-glean",
1951   .vector_size = sizeof (u32),
1952   .format_trace = format_ip4_forward_next_trace,
1953   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1954   .error_strings = ip4_arp_error_strings,
1955   .n_next_nodes = IP4_ARP_N_NEXT,
1956   .next_nodes = {
1957   [IP4_ARP_NEXT_DROP] = "error-drop",
1958   },
1959 };
1960 /* *INDENT-ON* */
1961
1962 #define foreach_notrace_ip4_arp_error           \
1963 _(THROTTLED)                                    \
1964 _(RESOLVED)                                     \
1965 _(NO_BUFFERS)                                   \
1966 _(REQUEST_SENT)                                 \
1967 _(NON_ARP_ADJ)                                  \
1968 _(NO_SOURCE_ADDRESS)
1969
1970 static clib_error_t *
1971 arp_notrace_init (vlib_main_t * vm)
1972 {
1973   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1974
1975   /* don't trace ARP request packets */
1976 #define _(a)                                    \
1977     vnet_pcap_drop_trace_filter_add_del         \
1978         (rt->errors[IP4_ARP_ERROR_##a],         \
1979          1 /* is_add */);
1980   foreach_notrace_ip4_arp_error;
1981 #undef _
1982   return 0;
1983 }
1984
1985 VLIB_INIT_FUNCTION (arp_notrace_init);
1986
1987
1988 #ifndef CLIB_MARCH_VARIANT
1989 /* Send an ARP request to see if given destination is reachable on given interface. */
1990 clib_error_t *
1991 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1992                     u8 refresh)
1993 {
1994   vnet_main_t *vnm = vnet_get_main ();
1995   ip4_main_t *im = &ip4_main;
1996   ethernet_arp_header_t *h;
1997   ip4_address_t *src;
1998   ip_interface_address_t *ia;
1999   ip_adjacency_t *adj;
2000   vnet_hw_interface_t *hi;
2001   vnet_sw_interface_t *si;
2002   vlib_buffer_t *b;
2003   adj_index_t ai;
2004   u32 bi = 0;
2005   u8 unicast_rewrite = 0;
2006
2007   si = vnet_get_sw_interface (vnm, sw_if_index);
2008
2009   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2010     {
2011       return clib_error_return (0, "%U: interface %U down",
2012                                 format_ip4_address, dst,
2013                                 format_vnet_sw_if_index_name, vnm,
2014                                 sw_if_index);
2015     }
2016
2017   src =
2018     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2019   if (!src)
2020     {
2021       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2022       return clib_error_return
2023         (0,
2024          "no matching interface address for destination %U (interface %U)",
2025          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2026          sw_if_index);
2027     }
2028
2029   h = vlib_packet_template_get_packet (vm,
2030                                        &im->ip4_arp_request_packet_template,
2031                                        &bi);
2032
2033   if (!h)
2034     return clib_error_return (0, "ARP request packet allocation failed");
2035
2036   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2037   if (PREDICT_FALSE (!hi->hw_address))
2038     {
2039       return clib_error_return (0, "%U: interface %U do not support ip probe",
2040                                 format_ip4_address, dst,
2041                                 format_vnet_sw_if_index_name, vnm,
2042                                 sw_if_index);
2043     }
2044
2045   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2046
2047   h->ip4_over_ethernet[0].ip4 = src[0];
2048   h->ip4_over_ethernet[1].ip4 = dst[0];
2049
2050   b = vlib_get_buffer (vm, bi);
2051   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2052     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2053
2054   ip46_address_t nh = {
2055     .ip4 = *dst,
2056   };
2057
2058   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2059                             VNET_LINK_IP4, &nh, sw_if_index);
2060   adj = adj_get (ai);
2061
2062   /* Peer has been previously resolved, retrieve glean adj instead */
2063   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2064     {
2065       if (refresh)
2066         unicast_rewrite = 1;
2067       else
2068         {
2069           adj_unlock (ai);
2070           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2071                                       VNET_LINK_IP4, sw_if_index, &nh);
2072           adj = adj_get (ai);
2073         }
2074     }
2075
2076   /* Add encapsulation string for software interface (e.g. ethernet header). */
2077   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2078   if (unicast_rewrite)
2079     {
2080       u16 *etype = vlib_buffer_get_current (b) - 2;
2081       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2082     }
2083   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2084
2085   {
2086     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2087     u32 *to_next = vlib_frame_vector_args (f);
2088     to_next[0] = bi;
2089     f->n_vectors = 1;
2090     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2091   }
2092
2093   adj_unlock (ai);
2094   return /* no error */ 0;
2095 }
2096 #endif
2097
2098 typedef enum
2099 {
2100   IP4_REWRITE_NEXT_DROP,
2101   IP4_REWRITE_NEXT_ICMP_ERROR,
2102   IP4_REWRITE_NEXT_FRAGMENT,
2103   IP4_REWRITE_N_NEXT            /* Last */
2104 } ip4_rewrite_next_t;
2105
2106 /**
2107  * This bits of an IPv4 address to mask to construct a multicast
2108  * MAC address
2109  */
2110 #if CLIB_ARCH_IS_BIG_ENDIAN
2111 #define IP4_MCAST_ADDR_MASK 0x007fffff
2112 #else
2113 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2114 #endif
2115
2116 always_inline void
2117 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2118                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2119 {
2120   if (packet_len > adj_packet_bytes)
2121     {
2122       *error = IP4_ERROR_MTU_EXCEEDED;
2123       if (df)
2124         {
2125           icmp4_error_set_vnet_buffer
2126             (b, ICMP4_destination_unreachable,
2127              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2128              adj_packet_bytes);
2129           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2130         }
2131       else
2132         {
2133           /* IP fragmentation */
2134           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2135                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2136           *next = IP4_REWRITE_NEXT_FRAGMENT;
2137         }
2138     }
2139 }
2140
2141 /* Decrement TTL & update checksum.
2142    Works either endian, so no need for byte swap. */
2143 static_always_inline void
2144 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2145                             u32 * error)
2146 {
2147   i32 ttl;
2148   u32 checksum;
2149   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2150     {
2151       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2152       return;
2153     }
2154
2155   ttl = ip->ttl;
2156
2157   /* Input node should have reject packets with ttl 0. */
2158   ASSERT (ip->ttl > 0);
2159
2160   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2161   checksum += checksum >= 0xffff;
2162
2163   ip->checksum = checksum;
2164   ttl -= 1;
2165   ip->ttl = ttl;
2166
2167   /*
2168    * If the ttl drops below 1 when forwarding, generate
2169    * an ICMP response.
2170    */
2171   if (PREDICT_FALSE (ttl <= 0))
2172     {
2173       *error = IP4_ERROR_TIME_EXPIRED;
2174       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2175       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2176                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2177                                    0);
2178       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2179     }
2180
2181   /* Verify checksum. */
2182   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2183           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2184 }
2185
2186
2187 always_inline uword
2188 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2189                              vlib_node_runtime_t * node,
2190                              vlib_frame_t * frame,
2191                              int do_counters, int is_midchain, int is_mcast,
2192                              int do_gso)
2193 {
2194   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2195   u32 *from = vlib_frame_vector_args (frame);
2196   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2197   u16 nexts[VLIB_FRAME_SIZE], *next;
2198   u32 n_left_from;
2199   vlib_node_runtime_t *error_node =
2200     vlib_node_get_runtime (vm, ip4_input_node.index);
2201
2202   n_left_from = frame->n_vectors;
2203   u32 thread_index = vm->thread_index;
2204
2205   vlib_get_buffers (vm, from, bufs, n_left_from);
2206   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2207
2208   if (n_left_from >= 6)
2209     {
2210       int i;
2211       for (i = 2; i < 6; i++)
2212         vlib_prefetch_buffer_header (bufs[i], LOAD);
2213     }
2214
2215   next = nexts;
2216   b = bufs;
2217   while (n_left_from >= 8)
2218     {
2219       ip_adjacency_t *adj0, *adj1;
2220       ip4_header_t *ip0, *ip1;
2221       u32 rw_len0, error0, adj_index0;
2222       u32 rw_len1, error1, adj_index1;
2223       u32 tx_sw_if_index0, tx_sw_if_index1;
2224       u8 *p;
2225
2226       vlib_prefetch_buffer_header (b[6], LOAD);
2227       vlib_prefetch_buffer_header (b[7], LOAD);
2228
2229       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2230       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2231
2232       /*
2233        * pre-fetch the per-adjacency counters
2234        */
2235       if (do_counters)
2236         {
2237           vlib_prefetch_combined_counter (&adjacency_counters,
2238                                           thread_index, adj_index0);
2239           vlib_prefetch_combined_counter (&adjacency_counters,
2240                                           thread_index, adj_index1);
2241         }
2242
2243       ip0 = vlib_buffer_get_current (b[0]);
2244       ip1 = vlib_buffer_get_current (b[1]);
2245
2246       error0 = error1 = IP4_ERROR_NONE;
2247
2248       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2249       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2250
2251       /* Rewrite packet header and updates lengths. */
2252       adj0 = adj_get (adj_index0);
2253       adj1 = adj_get (adj_index1);
2254
2255       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2256       rw_len0 = adj0[0].rewrite_header.data_bytes;
2257       rw_len1 = adj1[0].rewrite_header.data_bytes;
2258       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2259       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2260
2261       p = vlib_buffer_get_current (b[2]);
2262       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2263       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2264
2265       p = vlib_buffer_get_current (b[3]);
2266       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2267       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2268
2269       /* Check MTU of outgoing interface. */
2270       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2271       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2272
2273       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2274         ip0_len = gso_mtu_sz (b[0]);
2275       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2276         ip1_len = gso_mtu_sz (b[1]);
2277
2278       ip4_mtu_check (b[0], ip0_len,
2279                      adj0[0].rewrite_header.max_l3_packet_bytes,
2280                      ip0->flags_and_fragment_offset &
2281                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2282                      next + 0, &error0);
2283       ip4_mtu_check (b[1], ip1_len,
2284                      adj1[0].rewrite_header.max_l3_packet_bytes,
2285                      ip1->flags_and_fragment_offset &
2286                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2287                      next + 1, &error1);
2288
2289       if (is_mcast)
2290         {
2291           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2292                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2293                     IP4_ERROR_SAME_INTERFACE : error0);
2294           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2295                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2296                     IP4_ERROR_SAME_INTERFACE : error1);
2297         }
2298
2299       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2300        * to see the IP header */
2301       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2302         {
2303           u32 next_index = adj0[0].rewrite_header.next_index;
2304           b[0]->current_data -= rw_len0;
2305           b[0]->current_length += rw_len0;
2306           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2307           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2308
2309           if (PREDICT_FALSE
2310               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2311             vnet_feature_arc_start (lm->output_feature_arc_index,
2312                                     tx_sw_if_index0, &next_index, b[0]);
2313           next[0] = next_index;
2314         }
2315       else
2316         {
2317           b[0]->error = error_node->errors[error0];
2318         }
2319       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2320         {
2321           u32 next_index = adj1[0].rewrite_header.next_index;
2322           b[1]->current_data -= rw_len1;
2323           b[1]->current_length += rw_len1;
2324
2325           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2326           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2327
2328           if (PREDICT_FALSE
2329               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2330             vnet_feature_arc_start (lm->output_feature_arc_index,
2331                                     tx_sw_if_index1, &next_index, b[1]);
2332           next[1] = next_index;
2333         }
2334       else
2335         {
2336           b[1]->error = error_node->errors[error1];
2337         }
2338       if (is_midchain)
2339         {
2340           calc_checksums (vm, b[0]);
2341           calc_checksums (vm, b[1]);
2342         }
2343       /* Guess we are only writing on simple Ethernet header. */
2344       vnet_rewrite_two_headers (adj0[0], adj1[0],
2345                                 ip0, ip1, sizeof (ethernet_header_t));
2346
2347       /*
2348        * Bump the per-adjacency counters
2349        */
2350       if (do_counters)
2351         {
2352           vlib_increment_combined_counter
2353             (&adjacency_counters,
2354              thread_index,
2355              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2356
2357           vlib_increment_combined_counter
2358             (&adjacency_counters,
2359              thread_index,
2360              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2361         }
2362
2363       if (is_midchain)
2364         {
2365           if (adj0->sub_type.midchain.fixup_func)
2366             adj0->sub_type.midchain.fixup_func
2367               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2368           if (adj1->sub_type.midchain.fixup_func)
2369             adj1->sub_type.midchain.fixup_func
2370               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2371         }
2372
2373       if (is_mcast)
2374         {
2375           /*
2376            * copy bytes from the IP address into the MAC rewrite
2377            */
2378           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2379                                       adj0->rewrite_header.dst_mcast_offset,
2380                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2381           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2382                                       adj1->rewrite_header.dst_mcast_offset,
2383                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2384         }
2385
2386       next += 2;
2387       b += 2;
2388       n_left_from -= 2;
2389     }
2390
2391   while (n_left_from > 0)
2392     {
2393       ip_adjacency_t *adj0;
2394       ip4_header_t *ip0;
2395       u32 rw_len0, adj_index0, error0;
2396       u32 tx_sw_if_index0;
2397
2398       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2399
2400       adj0 = adj_get (adj_index0);
2401
2402       if (do_counters)
2403         vlib_prefetch_combined_counter (&adjacency_counters,
2404                                         thread_index, adj_index0);
2405
2406       ip0 = vlib_buffer_get_current (b[0]);
2407
2408       error0 = IP4_ERROR_NONE;
2409
2410       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2411
2412
2413       /* Update packet buffer attributes/set output interface. */
2414       rw_len0 = adj0[0].rewrite_header.data_bytes;
2415       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2416
2417       /* Check MTU of outgoing interface. */
2418       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2419       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2420         ip0_len = gso_mtu_sz (b[0]);
2421
2422       ip4_mtu_check (b[0], ip0_len,
2423                      adj0[0].rewrite_header.max_l3_packet_bytes,
2424                      ip0->flags_and_fragment_offset &
2425                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2426                      next + 0, &error0);
2427
2428       if (is_mcast)
2429         {
2430           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2431                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2432                     IP4_ERROR_SAME_INTERFACE : error0);
2433         }
2434
2435       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2436        * to see the IP header */
2437       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2438         {
2439           u32 next_index = adj0[0].rewrite_header.next_index;
2440           b[0]->current_data -= rw_len0;
2441           b[0]->current_length += rw_len0;
2442           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2443           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2444
2445           if (PREDICT_FALSE
2446               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2447             vnet_feature_arc_start (lm->output_feature_arc_index,
2448                                     tx_sw_if_index0, &next_index, b[0]);
2449           next[0] = next_index;
2450         }
2451       else
2452         {
2453           b[0]->error = error_node->errors[error0];
2454         }
2455       if (is_midchain)
2456         {
2457           calc_checksums (vm, b[0]);
2458         }
2459       /* Guess we are only writing on simple Ethernet header. */
2460       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2461
2462       if (do_counters)
2463         vlib_increment_combined_counter
2464           (&adjacency_counters,
2465            thread_index, adj_index0, 1,
2466            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2467
2468       if (is_midchain)
2469         {
2470           if (adj0->sub_type.midchain.fixup_func)
2471             adj0->sub_type.midchain.fixup_func
2472               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2473         }
2474
2475       if (is_mcast)
2476         {
2477           /*
2478            * copy bytes from the IP address into the MAC rewrite
2479            */
2480           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2481                                       adj0->rewrite_header.dst_mcast_offset,
2482                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2483         }
2484
2485       next += 1;
2486       b += 1;
2487       n_left_from -= 1;
2488     }
2489
2490
2491   /* Need to do trace after rewrites to pick up new packet data. */
2492   if (node->flags & VLIB_NODE_FLAG_TRACE)
2493     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2494
2495   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2496   return frame->n_vectors;
2497 }
2498
2499 always_inline uword
2500 ip4_rewrite_inline (vlib_main_t * vm,
2501                     vlib_node_runtime_t * node,
2502                     vlib_frame_t * frame,
2503                     int do_counters, int is_midchain, int is_mcast)
2504 {
2505   vnet_main_t *vnm = vnet_get_main ();
2506   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2507     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2508                                         is_midchain, is_mcast,
2509                                         1 /* do_gso */ );
2510   else
2511     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2512                                         is_midchain, is_mcast,
2513                                         0 /* no do_gso */ );
2514 }
2515
2516
2517 /** @brief IPv4 rewrite node.
2518     @node ip4-rewrite
2519
2520     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2521     header checksum, fetch the ip adjacency, check the outbound mtu,
2522     apply the adjacency rewrite, and send pkts to the adjacency
2523     rewrite header's rewrite_next_index.
2524
2525     @param vm vlib_main_t corresponding to the current thread
2526     @param node vlib_node_runtime_t
2527     @param frame vlib_frame_t whose contents should be dispatched
2528
2529     @par Graph mechanics: buffer metadata, next index usage
2530
2531     @em Uses:
2532     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2533         - the rewrite adjacency index
2534     - <code>adj->lookup_next_index</code>
2535         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2536           the packet will be dropped.
2537     - <code>adj->rewrite_header</code>
2538         - Rewrite string length, rewrite string, next_index
2539
2540     @em Sets:
2541     - <code>b->current_data, b->current_length</code>
2542         - Updated net of applying the rewrite string
2543
2544     <em>Next Indices:</em>
2545     - <code> adj->rewrite_header.next_index </code>
2546       or @c ip4-drop
2547 */
2548
2549 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2550                                  vlib_frame_t * frame)
2551 {
2552   if (adj_are_counters_enabled ())
2553     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2554   else
2555     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2556 }
2557
2558 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2559                                        vlib_node_runtime_t * node,
2560                                        vlib_frame_t * frame)
2561 {
2562   if (adj_are_counters_enabled ())
2563     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2564   else
2565     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2566 }
2567
2568 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2569                                   vlib_node_runtime_t * node,
2570                                   vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2576 }
2577
2578 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2579                                        vlib_node_runtime_t * node,
2580                                        vlib_frame_t * frame)
2581 {
2582   if (adj_are_counters_enabled ())
2583     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2584   else
2585     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2586 }
2587
2588 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2589                                         vlib_node_runtime_t * node,
2590                                         vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2596 }
2597
2598 /* *INDENT-OFF* */
2599 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2600   .name = "ip4-rewrite",
2601   .vector_size = sizeof (u32),
2602
2603   .format_trace = format_ip4_rewrite_trace,
2604
2605   .n_next_nodes = IP4_REWRITE_N_NEXT,
2606   .next_nodes = {
2607     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2608     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2609     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2610   },
2611 };
2612
2613 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2614   .name = "ip4-rewrite-bcast",
2615   .vector_size = sizeof (u32),
2616
2617   .format_trace = format_ip4_rewrite_trace,
2618   .sibling_of = "ip4-rewrite",
2619 };
2620
2621 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2622   .name = "ip4-rewrite-mcast",
2623   .vector_size = sizeof (u32),
2624
2625   .format_trace = format_ip4_rewrite_trace,
2626   .sibling_of = "ip4-rewrite",
2627 };
2628
2629 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2630   .name = "ip4-mcast-midchain",
2631   .vector_size = sizeof (u32),
2632
2633   .format_trace = format_ip4_rewrite_trace,
2634   .sibling_of = "ip4-rewrite",
2635 };
2636
2637 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2638   .name = "ip4-midchain",
2639   .vector_size = sizeof (u32),
2640   .format_trace = format_ip4_forward_next_trace,
2641   .sibling_of =  "ip4-rewrite",
2642 };
2643 /* *INDENT-ON */
2644
2645 static int
2646 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2647 {
2648   ip4_fib_mtrie_t *mtrie0;
2649   ip4_fib_mtrie_leaf_t leaf0;
2650   u32 lbi0;
2651
2652   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2653
2654   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2655   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2656   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2657
2658   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2659
2660   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2661 }
2662
2663 static clib_error_t *
2664 test_lookup_command_fn (vlib_main_t * vm,
2665                         unformat_input_t * input, vlib_cli_command_t * cmd)
2666 {
2667   ip4_fib_t *fib;
2668   u32 table_id = 0;
2669   f64 count = 1;
2670   u32 n;
2671   int i;
2672   ip4_address_t ip4_base_address;
2673   u64 errors = 0;
2674
2675   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2676     {
2677       if (unformat (input, "table %d", &table_id))
2678         {
2679           /* Make sure the entry exists. */
2680           fib = ip4_fib_get (table_id);
2681           if ((fib) && (fib->index != table_id))
2682             return clib_error_return (0, "<fib-index> %d does not exist",
2683                                       table_id);
2684         }
2685       else if (unformat (input, "count %f", &count))
2686         ;
2687
2688       else if (unformat (input, "%U",
2689                          unformat_ip4_address, &ip4_base_address))
2690         ;
2691       else
2692         return clib_error_return (0, "unknown input `%U'",
2693                                   format_unformat_error, input);
2694     }
2695
2696   n = count;
2697
2698   for (i = 0; i < n; i++)
2699     {
2700       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2701         errors++;
2702
2703       ip4_base_address.as_u32 =
2704         clib_host_to_net_u32 (1 +
2705                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2706     }
2707
2708   if (errors)
2709     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2710   else
2711     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2712
2713   return 0;
2714 }
2715
2716 /*?
2717  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2718  * given FIB table to determine if there is a conflict with the
2719  * adjacency table. The fib-id can be determined by using the
2720  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2721  * of 0 is used.
2722  *
2723  * @todo This command uses fib-id, other commands use table-id (not
2724  * just a name, they are different indexes). Would like to change this
2725  * to table-id for consistency.
2726  *
2727  * @cliexpar
2728  * Example of how to run the test lookup command:
2729  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2730  * No errors in 2 lookups
2731  * @cliexend
2732 ?*/
2733 /* *INDENT-OFF* */
2734 VLIB_CLI_COMMAND (lookup_test_command, static) =
2735 {
2736   .path = "test lookup",
2737   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2738   .function = test_lookup_command_fn,
2739 };
2740 /* *INDENT-ON* */
2741
2742 #ifndef CLIB_MARCH_VARIANT
2743 int
2744 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2745 {
2746   u32 fib_index;
2747
2748   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2749
2750   if (~0 == fib_index)
2751     return VNET_API_ERROR_NO_SUCH_FIB;
2752
2753   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2754                                   flow_hash_config);
2755
2756   return 0;
2757 }
2758 #endif
2759
2760 static clib_error_t *
2761 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2762                              unformat_input_t * input,
2763                              vlib_cli_command_t * cmd)
2764 {
2765   int matched = 0;
2766   u32 table_id = 0;
2767   u32 flow_hash_config = 0;
2768   int rv;
2769
2770   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2771     {
2772       if (unformat (input, "table %d", &table_id))
2773         matched = 1;
2774 #define _(a,v) \
2775     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2776       foreach_flow_hash_bit
2777 #undef _
2778         else
2779         break;
2780     }
2781
2782   if (matched == 0)
2783     return clib_error_return (0, "unknown input `%U'",
2784                               format_unformat_error, input);
2785
2786   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2787   switch (rv)
2788     {
2789     case 0:
2790       break;
2791
2792     case VNET_API_ERROR_NO_SUCH_FIB:
2793       return clib_error_return (0, "no such FIB table %d", table_id);
2794
2795     default:
2796       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2797       break;
2798     }
2799
2800   return 0;
2801 }
2802
2803 /*?
2804  * Configure the set of IPv4 fields used by the flow hash.
2805  *
2806  * @cliexpar
2807  * Example of how to set the flow hash on a given table:
2808  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2809  * Example of display the configured flow hash:
2810  * @cliexstart{show ip fib}
2811  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2812  * 0.0.0.0/0
2813  *   unicast-ip4-chain
2814  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2815  *     [0] [@0]: dpo-drop ip6
2816  * 0.0.0.0/32
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2819  *     [0] [@0]: dpo-drop ip6
2820  * 224.0.0.0/8
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2823  *     [0] [@0]: dpo-drop ip6
2824  * 6.0.1.2/32
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2827  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2828  * 7.0.0.1/32
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2831  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2832  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2833  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2834  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2835  * 240.0.0.0/8
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2838  *     [0] [@0]: dpo-drop ip6
2839  * 255.255.255.255/32
2840  *   unicast-ip4-chain
2841  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2842  *     [0] [@0]: dpo-drop ip6
2843  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2844  * 0.0.0.0/0
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2847  *     [0] [@0]: dpo-drop ip6
2848  * 0.0.0.0/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2851  *     [0] [@0]: dpo-drop ip6
2852  * 172.16.1.0/24
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2855  *     [0] [@4]: ipv4-glean: af_packet0
2856  * 172.16.1.1/32
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2859  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2860  * 172.16.1.2/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2863  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2864  * 172.16.2.0/24
2865  *   unicast-ip4-chain
2866  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2867  *     [0] [@4]: ipv4-glean: af_packet1
2868  * 172.16.2.1/32
2869  *   unicast-ip4-chain
2870  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2871  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2872  * 224.0.0.0/8
2873  *   unicast-ip4-chain
2874  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2875  *     [0] [@0]: dpo-drop ip6
2876  * 240.0.0.0/8
2877  *   unicast-ip4-chain
2878  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2879  *     [0] [@0]: dpo-drop ip6
2880  * 255.255.255.255/32
2881  *   unicast-ip4-chain
2882  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2883  *     [0] [@0]: dpo-drop ip6
2884  * @cliexend
2885 ?*/
2886 /* *INDENT-OFF* */
2887 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2888 {
2889   .path = "set ip flow-hash",
2890   .short_help =
2891   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2892   .function = set_ip_flow_hash_command_fn,
2893 };
2894 /* *INDENT-ON* */
2895
2896 #ifndef CLIB_MARCH_VARIANT
2897 int
2898 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2899                              u32 table_index)
2900 {
2901   vnet_main_t *vnm = vnet_get_main ();
2902   vnet_interface_main_t *im = &vnm->interface_main;
2903   ip4_main_t *ipm = &ip4_main;
2904   ip_lookup_main_t *lm = &ipm->lookup_main;
2905   vnet_classify_main_t *cm = &vnet_classify_main;
2906   ip4_address_t *if_addr;
2907
2908   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2909     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2910
2911   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2912     return VNET_API_ERROR_NO_SUCH_ENTRY;
2913
2914   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2915   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2916
2917   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2918
2919   if (NULL != if_addr)
2920     {
2921       fib_prefix_t pfx = {
2922         .fp_len = 32,
2923         .fp_proto = FIB_PROTOCOL_IP4,
2924         .fp_addr.ip4 = *if_addr,
2925       };
2926       u32 fib_index;
2927
2928       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2929                                                        sw_if_index);
2930
2931
2932       if (table_index != (u32) ~ 0)
2933         {
2934           dpo_id_t dpo = DPO_INVALID;
2935
2936           dpo_set (&dpo,
2937                    DPO_CLASSIFY,
2938                    DPO_PROTO_IP4,
2939                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2940
2941           fib_table_entry_special_dpo_add (fib_index,
2942                                            &pfx,
2943                                            FIB_SOURCE_CLASSIFY,
2944                                            FIB_ENTRY_FLAG_NONE, &dpo);
2945           dpo_reset (&dpo);
2946         }
2947       else
2948         {
2949           fib_table_entry_special_remove (fib_index,
2950                                           &pfx, FIB_SOURCE_CLASSIFY);
2951         }
2952     }
2953
2954   return 0;
2955 }
2956 #endif
2957
2958 static clib_error_t *
2959 set_ip_classify_command_fn (vlib_main_t * vm,
2960                             unformat_input_t * input,
2961                             vlib_cli_command_t * cmd)
2962 {
2963   u32 table_index = ~0;
2964   int table_index_set = 0;
2965   u32 sw_if_index = ~0;
2966   int rv;
2967
2968   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2969     {
2970       if (unformat (input, "table-index %d", &table_index))
2971         table_index_set = 1;
2972       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2973                          vnet_get_main (), &sw_if_index))
2974         ;
2975       else
2976         break;
2977     }
2978
2979   if (table_index_set == 0)
2980     return clib_error_return (0, "classify table-index must be specified");
2981
2982   if (sw_if_index == ~0)
2983     return clib_error_return (0, "interface / subif must be specified");
2984
2985   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2986
2987   switch (rv)
2988     {
2989     case 0:
2990       break;
2991
2992     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2993       return clib_error_return (0, "No such interface");
2994
2995     case VNET_API_ERROR_NO_SUCH_ENTRY:
2996       return clib_error_return (0, "No such classifier table");
2997     }
2998   return 0;
2999 }
3000
3001 /*?
3002  * Assign a classification table to an interface. The classification
3003  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3004  * commands. Once the table is create, use this command to filter packets
3005  * on an interface.
3006  *
3007  * @cliexpar
3008  * Example of how to assign a classification table to an interface:
3009  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3010 ?*/
3011 /* *INDENT-OFF* */
3012 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3013 {
3014     .path = "set ip classify",
3015     .short_help =
3016     "set ip classify intfc <interface> table-index <classify-idx>",
3017     .function = set_ip_classify_command_fn,
3018 };
3019 /* *INDENT-ON* */
3020
3021 static clib_error_t *
3022 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3023 {
3024   ip4_main_t *im = &ip4_main;
3025   uword heapsize = 0;
3026
3027   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3028     {
3029       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3030         ;
3031       else
3032         return clib_error_return (0,
3033                                   "invalid heap-size parameter `%U'",
3034                                   format_unformat_error, input);
3035     }
3036
3037   im->mtrie_heap_size = heapsize;
3038
3039   return 0;
3040 }
3041
3042 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3043
3044 /*
3045  * fd.io coding-style-patch-verification: ON
3046  *
3047  * Local Variables:
3048  * eval: (c-set-style "gnu")
3049  * End:
3050  */